From d9bae32c6a32a86805dc1152196f55cf02114b93 Mon Sep 17 00:00:00 2001 From: Schspa Shi Date: Sun, 8 May 2022 23:02:47 +0800 Subject: [PATCH 0001/4335] usb: gadget: fix race when gadget driver register via ioctl commit 5f0b5f4d50fa0faa8c76ef9d42a42e8d43f98b44 upstream. The usb_gadget_register_driver can be called multi time by to threads via USB_RAW_IOCTL_RUN ioctl syscall, which will lead to multiple registrations. Call trace: driver_register+0x220/0x3a0 drivers/base/driver.c:171 usb_gadget_register_driver_owner+0xfb/0x1e0 drivers/usb/gadget/udc/core.c:1546 raw_ioctl_run drivers/usb/gadget/legacy/raw_gadget.c:513 [inline] raw_ioctl+0x1883/0x2730 drivers/usb/gadget/legacy/raw_gadget.c:1220 ioctl USB_RAW_IOCTL_RUN This routine allows two processes to register the same driver instance via ioctl syscall. which lead to a race condition. Please refer to the following scenarios. T1 T2 ------------------------------------------------------------------ usb_gadget_register_driver_owner driver_register driver_register driver_find driver_find bus_add_driver bus_add_driver priv alloced drv->p = priv; kobject_init_and_add // refcount = 1; //couldn't find an available UDC or it's busy priv alloced drv->priv = priv; kobject_init_and_add ---> refcount = 1 <------ // register success ===================== another ioctl/process ====================== driver_register driver_find k = kset_find_obj() ---> refcount = 2 <------ driver_unregister // drv->p become T2's priv ---> refcount = 1 <------ kobject_put(k) ---> refcount = 0 <------ return priv->driver; --------UAF here---------- There will be UAF in this scenario. We can fix it by adding a new STATE_DEV_REGISTERING device state to avoid double register. Reported-by: syzbot+dc7c3ca638e773db07f6@syzkaller.appspotmail.com Link: https://lore.kernel.org/all/000000000000e66c2805de55b15a@google.com/ Reviewed-by: Andrey Konovalov Signed-off-by: Schspa Shi Link: https://lore.kernel.org/r/20220508150247.38204-1-schspa@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/raw_gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index d86c3a36441ee..3427ce37a5c5b 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -145,6 +145,7 @@ enum dev_state { STATE_DEV_INVALID = 0, STATE_DEV_OPENED, STATE_DEV_INITIALIZED, + STATE_DEV_REGISTERING, STATE_DEV_RUNNING, STATE_DEV_CLOSED, STATE_DEV_FAILED @@ -508,6 +509,7 @@ static int raw_ioctl_run(struct raw_dev *dev, unsigned long value) ret = -EINVAL; goto out_unlock; } + dev->state = STATE_DEV_REGISTERING; spin_unlock_irqrestore(&dev->lock, flags); ret = usb_gadget_probe_driver(&dev->driver); -- GitLab From 5655455a06b8d17285df320dd1bce19d37392fa9 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 17 Oct 2021 00:07:08 +0100 Subject: [PATCH 0002/4335] io_uring: arm poll for non-nowait files commit e74ead135bc4459f7d40b1f8edab1333a28b54e8 upstream. Don't check if we can do nowait before arming apoll, there are several reasons for that. First, we don't care much about files that don't support nowait. Second, it may be useful -- we don't want to be taking away extra workers from io-wq when it can go in some async. Even if it will go through io-wq eventually, it make difference in the numbers of workers actually used. And the last one, it's needed to clean nowait in future commits. [kernel test robot: fix unused-var] Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9d06f3cb2c8b686d970269a87986f154edb83043.1634425438.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7aad4bde92e96..b8e6398d94307 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5662,7 +5662,6 @@ static int io_arm_poll_handler(struct io_kiocb *req) struct async_poll *apoll; struct io_poll_table ipt; __poll_t ret, mask = EPOLLONESHOT | POLLERR | POLLPRI; - int rw; if (!req->file || !file_can_poll(req->file)) return IO_APOLL_ABORTED; @@ -5672,7 +5671,6 @@ static int io_arm_poll_handler(struct io_kiocb *req) return IO_APOLL_ABORTED; if (def->pollin) { - rw = READ; mask |= POLLIN | POLLRDNORM; /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ @@ -5680,14 +5678,9 @@ static int io_arm_poll_handler(struct io_kiocb *req) (req->sr_msg.msg_flags & MSG_ERRQUEUE)) mask &= ~POLLIN; } else { - rw = WRITE; mask |= POLLOUT | POLLWRNORM; } - /* if we can't nonblock try, then no point in arming a poll handler */ - if (!io_file_supports_nowait(req, rw)) - return IO_APOLL_ABORTED; - apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); if (unlikely(!apoll)) return IO_APOLL_ABORTED; -- GitLab From fc2bee93e31bbba920e9eeba76af72264ced066f Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 8 May 2022 11:37:07 +0200 Subject: [PATCH 0003/4335] floppy: use a statically allocated error counter commit f71f01394f742fc4558b3f9f4c7ef4c4cf3b07c8 upstream. Interrupt handler bad_flp_intr() may cause a UAF on the recently freed request just to increment the error count. There's no point keeping that one in the request anyway, and since the interrupt handler uses a static pointer to the error which cannot be kept in sync with the pending request, better make it use a static error counter that's reset for each new request. This reset now happens when entering redo_fd_request() for a new request via set_next_request(). One initial concern about a single error counter was that errors on one floppy drive could be reported on another one, but this problem is not real given that the driver uses a single drive at a time, as that PC-compatible controllers also have this limitation by using shared signals. As such the error count is always for the "current" drive. Reported-by: Minh Yuan Suggested-by: Linus Torvalds Tested-by: Denis Efremov Signed-off-by: Willy Tarreau Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/block/floppy.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 1c152b542a52d..db0b3e8982fe5 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -509,8 +509,8 @@ static unsigned long fdc_busy; static DECLARE_WAIT_QUEUE_HEAD(fdc_wait); static DECLARE_WAIT_QUEUE_HEAD(command_done); -/* Errors during formatting are counted here. */ -static int format_errors; +/* errors encountered on the current (or last) request */ +static int floppy_errors; /* Format request descriptor. */ static struct format_descr format_req; @@ -530,7 +530,6 @@ static struct format_descr format_req; static char *floppy_track_buffer; static int max_buffer_sectors; -static int *errors; typedef void (*done_f)(int); static const struct cont_t { void (*interrupt)(void); @@ -1455,7 +1454,7 @@ static int interpret_errors(void) if (drive_params[current_drive].flags & FTD_MSG) DPRINT("Over/Underrun - retrying\n"); bad = 0; - } else if (*errors >= drive_params[current_drive].max_errors.reporting) { + } else if (floppy_errors >= drive_params[current_drive].max_errors.reporting) { print_errors(); } if (reply_buffer[ST2] & ST2_WC || reply_buffer[ST2] & ST2_BC) @@ -2095,7 +2094,7 @@ static void bad_flp_intr(void) if (!next_valid_format(current_drive)) return; } - err_count = ++(*errors); + err_count = ++floppy_errors; INFBOUND(write_errors[current_drive].badness, err_count); if (err_count > drive_params[current_drive].max_errors.abort) cont->done(0); @@ -2241,9 +2240,8 @@ static int do_format(int drive, struct format_descr *tmp_format_req) return -EINVAL; } format_req = *tmp_format_req; - format_errors = 0; cont = &format_cont; - errors = &format_errors; + floppy_errors = 0; ret = wait_til_done(redo_format, true); if (ret == -EINTR) return -EINTR; @@ -2761,10 +2759,11 @@ static int set_next_request(void) current_req = list_first_entry_or_null(&floppy_reqs, struct request, queuelist); if (current_req) { - current_req->error_count = 0; + floppy_errors = 0; list_del_init(¤t_req->queuelist); + return 1; } - return current_req != NULL; + return 0; } /* Starts or continues processing request. Will automatically unlock the @@ -2823,7 +2822,6 @@ do_request: _floppy = floppy_type + drive_params[current_drive].autodetect[drive_state[current_drive].probed_format]; } else probing = 0; - errors = &(current_req->error_count); tmp = make_raw_rw_request(); if (tmp < 2) { request_done(tmp); -- GitLab From 879e811a41f8d3fc9294380d4663cb72f6d00208 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 9 Feb 2022 11:27:09 -0600 Subject: [PATCH 0004/4335] kernel/resource: Introduce request_mem_region_muxed() commit 27c196c7b73cb70bbed3a9df46563bab60e63415 upstream. Support for requesting muxed memory region is implemented but not currently callable as a macro. Add the request muxed memory region macro. MMIO memory accesses can be synchronized using request_mem_region() which is already available. This call will return failure if the resource is busy. The 'muxed' version of this macro will handle a busy resource by using a wait queue to retry until the resource is available. Signed-off-by: Terry Bowman Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- include/linux/ioport.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 8359c50f99884..ec5f71f7135b0 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -262,6 +262,8 @@ resource_union(struct resource *r1, struct resource *r2, struct resource *r) #define request_muxed_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED) #define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl) #define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0) +#define request_mem_region_muxed(start, n, name) \ + __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_MUXED) #define request_mem_region_exclusive(start,n,name) \ __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE) #define rename_region(region, newname) do { (region)->name = (newname); } while (0) -- GitLab From 5166f933da0e35efa5b241bd3dc7a787daba1d6e Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 9 Feb 2022 11:27:10 -0600 Subject: [PATCH 0005/4335] i2c: piix4: Replace hardcoded memory map size with a #define commit 93102cb449780f7b4eecf713451627b78373ce49 upstream. Replace number constant with #define to improve readability and maintainability. Signed-off-by: Terry Bowman Reviewed-by: Andy Shevchenko Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-piix4.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 8c1b31ed0c429..3ff68967034e7 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -77,6 +77,7 @@ /* SB800 constants */ #define SB800_PIIX4_SMB_IDX 0xcd6 +#define SB800_PIIX4_SMB_MAP_SIZE 2 #define KERNCZ_IMC_IDX 0x3e #define KERNCZ_IMC_DATA 0x3f @@ -290,7 +291,8 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, else smb_en = (aux) ? 0x28 : 0x2c; - if (!request_muxed_region(SB800_PIIX4_SMB_IDX, 2, "sb800_piix4_smb")) { + if (!request_muxed_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE, + "sb800_piix4_smb")) { dev_err(&PIIX4_dev->dev, "SMB base address index region 0x%x already in use.\n", SB800_PIIX4_SMB_IDX); @@ -302,7 +304,7 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX); smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1); - release_region(SB800_PIIX4_SMB_IDX, 2); + release_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE); if (!smb_en) { smb_en_status = smba_en_lo & 0x10; @@ -371,7 +373,8 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT; } } else { - if (!request_muxed_region(SB800_PIIX4_SMB_IDX, 2, + if (!request_muxed_region(SB800_PIIX4_SMB_IDX, + SB800_PIIX4_SMB_MAP_SIZE, "sb800_piix4_smb")) { release_region(piix4_smba, SMBIOSIZE); return -EBUSY; @@ -384,7 +387,7 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, SB800_PIIX4_PORT_IDX; piix4_port_mask_sb800 = SB800_PIIX4_PORT_IDX_MASK; piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT; - release_region(SB800_PIIX4_SMB_IDX, 2); + release_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE); } dev_info(&PIIX4_dev->dev, @@ -682,7 +685,8 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, u8 port; int retval; - if (!request_muxed_region(SB800_PIIX4_SMB_IDX, 2, "sb800_piix4_smb")) + if (!request_muxed_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE, + "sb800_piix4_smb")) return -EBUSY; /* Request the SMBUS semaphore, avoid conflicts with the IMC */ @@ -758,7 +762,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, piix4_imc_wakeup(); release: - release_region(SB800_PIIX4_SMB_IDX, 2); + release_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE); return retval; } -- GitLab From bdf9bed734c3981acd12e3c3e2825ae87dd44f6b Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 9 Feb 2022 11:27:11 -0600 Subject: [PATCH 0006/4335] i2c: piix4: Move port I/O region request/release code into functions commit a3325d225b00889f4b7fdb25d83033cae1048a92 upstream. Move duplicated region request and release code into a function. Move is in preparation for following MMIO changes. Signed-off-by: Terry Bowman Reviewed-by: Andy Shevchenko Reviewed-by: Jean Delvare [wsa: added missing curly brace] Signed-off-by: Wolfram Sang Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-piix4.c | 48 ++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 3ff68967034e7..39c4f5ce0cd91 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -165,6 +165,24 @@ struct i2c_piix4_adapdata { u8 port; /* Port number, shifted */ }; +static int piix4_sb800_region_request(struct device *dev) +{ + if (!request_muxed_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE, + "sb800_piix4_smb")) { + dev_err(dev, + "SMBus base address index region 0x%x already in use.\n", + SB800_PIIX4_SMB_IDX); + return -EBUSY; + } + + return 0; +} + +static void piix4_sb800_region_release(struct device *dev) +{ + release_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE); +} + static int piix4_setup(struct pci_dev *PIIX4_dev, const struct pci_device_id *id) { @@ -270,6 +288,7 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, unsigned short piix4_smba; u8 smba_en_lo, smba_en_hi, smb_en, smb_en_status, port_sel; u8 i2ccfg, i2ccfg_offset = 0x10; + int retval; /* SB800 and later SMBus does not support forcing address */ if (force || force_addr) { @@ -291,20 +310,16 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, else smb_en = (aux) ? 0x28 : 0x2c; - if (!request_muxed_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE, - "sb800_piix4_smb")) { - dev_err(&PIIX4_dev->dev, - "SMB base address index region 0x%x already in use.\n", - SB800_PIIX4_SMB_IDX); - return -EBUSY; - } + retval = piix4_sb800_region_request(&PIIX4_dev->dev); + if (retval) + return retval; outb_p(smb_en, SB800_PIIX4_SMB_IDX); smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1); outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX); smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1); - release_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE); + piix4_sb800_region_release(&PIIX4_dev->dev); if (!smb_en) { smb_en_status = smba_en_lo & 0x10; @@ -373,11 +388,10 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT; } } else { - if (!request_muxed_region(SB800_PIIX4_SMB_IDX, - SB800_PIIX4_SMB_MAP_SIZE, - "sb800_piix4_smb")) { + retval = piix4_sb800_region_request(&PIIX4_dev->dev); + if (retval) { release_region(piix4_smba, SMBIOSIZE); - return -EBUSY; + return retval; } outb_p(SB800_PIIX4_PORT_IDX_SEL, SB800_PIIX4_SMB_IDX); @@ -387,7 +401,7 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, SB800_PIIX4_PORT_IDX; piix4_port_mask_sb800 = SB800_PIIX4_PORT_IDX_MASK; piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT; - release_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE); + piix4_sb800_region_release(&PIIX4_dev->dev); } dev_info(&PIIX4_dev->dev, @@ -685,9 +699,9 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, u8 port; int retval; - if (!request_muxed_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE, - "sb800_piix4_smb")) - return -EBUSY; + retval = piix4_sb800_region_request(&adap->dev); + if (retval) + return retval; /* Request the SMBUS semaphore, avoid conflicts with the IMC */ smbslvcnt = inb_p(SMBSLVCNT); @@ -762,7 +776,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, piix4_imc_wakeup(); release: - release_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE); + piix4_sb800_region_release(&adap->dev); return retval; } -- GitLab From 9a8119ddc3c5069dca15b5497db244152471fa87 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 9 Feb 2022 11:27:12 -0600 Subject: [PATCH 0007/4335] i2c: piix4: Move SMBus controller base address detect into function commit 0a59a24e14e9b21dcbb6b8ea41422e2fdfa437fd upstream. Move SMBus controller base address detection into function. Refactor is in preparation for following MMIO changes. Signed-off-by: Terry Bowman Reviewed-by: Andy Shevchenko Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-piix4.c | 69 ++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 25 deletions(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 39c4f5ce0cd91..132ded733781c 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -282,11 +282,51 @@ static int piix4_setup(struct pci_dev *PIIX4_dev, return piix4_smba; } +static int piix4_setup_sb800_smba(struct pci_dev *PIIX4_dev, + u8 smb_en, + u8 aux, + u8 *smb_en_status, + unsigned short *piix4_smba) +{ + u8 smba_en_lo; + u8 smba_en_hi; + int retval; + + retval = piix4_sb800_region_request(&PIIX4_dev->dev); + if (retval) + return retval; + + outb_p(smb_en, SB800_PIIX4_SMB_IDX); + smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1); + outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX); + smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1); + + piix4_sb800_region_release(&PIIX4_dev->dev); + + if (!smb_en) { + *smb_en_status = smba_en_lo & 0x10; + *piix4_smba = smba_en_hi << 8; + if (aux) + *piix4_smba |= 0x20; + } else { + *smb_en_status = smba_en_lo & 0x01; + *piix4_smba = ((smba_en_hi << 8) | smba_en_lo) & 0xffe0; + } + + if (!*smb_en_status) { + dev_err(&PIIX4_dev->dev, + "SMBus Host Controller not enabled!\n"); + return -ENODEV; + } + + return 0; +} + static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, const struct pci_device_id *id, u8 aux) { unsigned short piix4_smba; - u8 smba_en_lo, smba_en_hi, smb_en, smb_en_status, port_sel; + u8 smb_en, smb_en_status, port_sel; u8 i2ccfg, i2ccfg_offset = 0x10; int retval; @@ -310,33 +350,12 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, else smb_en = (aux) ? 0x28 : 0x2c; - retval = piix4_sb800_region_request(&PIIX4_dev->dev); + retval = piix4_setup_sb800_smba(PIIX4_dev, smb_en, aux, &smb_en_status, + &piix4_smba); + if (retval) return retval; - outb_p(smb_en, SB800_PIIX4_SMB_IDX); - smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1); - outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX); - smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1); - - piix4_sb800_region_release(&PIIX4_dev->dev); - - if (!smb_en) { - smb_en_status = smba_en_lo & 0x10; - piix4_smba = smba_en_hi << 8; - if (aux) - piix4_smba |= 0x20; - } else { - smb_en_status = smba_en_lo & 0x01; - piix4_smba = ((smba_en_hi << 8) | smba_en_lo) & 0xffe0; - } - - if (!smb_en_status) { - dev_err(&PIIX4_dev->dev, - "SMBus Host Controller not enabled!\n"); - return -ENODEV; - } - if (acpi_check_region(piix4_smba, SMBIOSIZE, piix4_driver.name)) return -ENODEV; -- GitLab From 08bc26f28bcbef4b96c94205a4cee5a763514279 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 9 Feb 2022 11:27:13 -0600 Subject: [PATCH 0008/4335] i2c: piix4: Move SMBus port selection into function commit fbafbd51bff52cb3a920fd98d4dae2a78dd433d0 upstream. Move port selection code into a separate function. Refactor is in preparation for following MMIO changes. Signed-off-by: Terry Bowman Reviewed-by: Andy Shevchenko Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-piix4.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 132ded733781c..efe51f67fdd6b 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -698,6 +698,20 @@ static void piix4_imc_wakeup(void) release_region(KERNCZ_IMC_IDX, 2); } +static int piix4_sb800_port_sel(u8 port) +{ + u8 smba_en_lo, val; + + outb_p(piix4_port_sel_sb800, SB800_PIIX4_SMB_IDX); + smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1); + + val = (smba_en_lo & ~piix4_port_mask_sb800) | port; + if (smba_en_lo != val) + outb_p(val, SB800_PIIX4_SMB_IDX + 1); + + return (smba_en_lo & piix4_port_mask_sb800); +} + /* * Handles access to multiple SMBus ports on the SB800. * The port is selected by bits 2:1 of the smb_en register (0x2c). @@ -714,8 +728,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, unsigned short piix4_smba = adapdata->smba; int retries = MAX_TIMEOUT; int smbslvcnt; - u8 smba_en_lo; - u8 port; + u8 prev_port; int retval; retval = piix4_sb800_region_request(&adap->dev); @@ -775,18 +788,12 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, } } - outb_p(piix4_port_sel_sb800, SB800_PIIX4_SMB_IDX); - smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1); - - port = adapdata->port; - if ((smba_en_lo & piix4_port_mask_sb800) != port) - outb_p((smba_en_lo & ~piix4_port_mask_sb800) | port, - SB800_PIIX4_SMB_IDX + 1); + prev_port = piix4_sb800_port_sel(adapdata->port); retval = piix4_access(adap, addr, flags, read_write, command, size, data); - outb_p(smba_en_lo, SB800_PIIX4_SMB_IDX + 1); + piix4_sb800_port_sel(prev_port); /* Release the semaphore */ outb_p(smbslvcnt | 0x20, SMBSLVCNT); -- GitLab From 4b965566ca26e83553d92b8c57050e5d59911806 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 9 Feb 2022 11:27:14 -0600 Subject: [PATCH 0009/4335] i2c: piix4: Add EFCH MMIO support to region request and release commit 7c148722d074c29fb998578eea5de3c14b9608c9 upstream. EFCH cd6h/cd7h port I/O may no longer be available on later AMD processors and it is recommended to use MMIO instead. Update the request and release functions to support MMIO. MMIO request/release and mmapping require details during cleanup. Add a MMIO configuration structure containing resource and vaddress details for mapping the region, accessing the region, and releasing the region. Signed-off-by: Terry Bowman Reviewed-by: Andy Shevchenko Reviewed-by: Jean Delvare [wsa: rebased after fixup in previous patch] Signed-off-by: Wolfram Sang Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-piix4.c | 66 +++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index efe51f67fdd6b..d06998f7b0314 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -98,6 +98,9 @@ #define SB800_PIIX4_PORT_IDX_MASK_KERNCZ 0x18 #define SB800_PIIX4_PORT_IDX_SHIFT_KERNCZ 3 +#define SB800_PIIX4_FCH_PM_ADDR 0xFED80300 +#define SB800_PIIX4_FCH_PM_SIZE 8 + /* insmod parameters */ /* If force is set to anything different from 0, we forcibly enable the @@ -156,6 +159,12 @@ static const char *piix4_main_port_names_sb800[PIIX4_MAX_ADAPTERS] = { }; static const char *piix4_aux_port_name_sb800 = " port 1"; +struct sb800_mmio_cfg { + void __iomem *addr; + struct resource *res; + bool use_mmio; +}; + struct i2c_piix4_adapdata { unsigned short smba; @@ -163,10 +172,40 @@ struct i2c_piix4_adapdata { bool sb800_main; bool notify_imc; u8 port; /* Port number, shifted */ + struct sb800_mmio_cfg mmio_cfg; }; -static int piix4_sb800_region_request(struct device *dev) +static int piix4_sb800_region_request(struct device *dev, + struct sb800_mmio_cfg *mmio_cfg) { + if (mmio_cfg->use_mmio) { + struct resource *res; + void __iomem *addr; + + res = request_mem_region_muxed(SB800_PIIX4_FCH_PM_ADDR, + SB800_PIIX4_FCH_PM_SIZE, + "sb800_piix4_smb"); + if (!res) { + dev_err(dev, + "SMBus base address memory region 0x%x already in use.\n", + SB800_PIIX4_FCH_PM_ADDR); + return -EBUSY; + } + + addr = ioremap(SB800_PIIX4_FCH_PM_ADDR, + SB800_PIIX4_FCH_PM_SIZE); + if (!addr) { + release_resource(res); + dev_err(dev, "SMBus base address mapping failed.\n"); + return -ENOMEM; + } + + mmio_cfg->res = res; + mmio_cfg->addr = addr; + + return 0; + } + if (!request_muxed_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE, "sb800_piix4_smb")) { dev_err(dev, @@ -178,8 +217,15 @@ static int piix4_sb800_region_request(struct device *dev) return 0; } -static void piix4_sb800_region_release(struct device *dev) +static void piix4_sb800_region_release(struct device *dev, + struct sb800_mmio_cfg *mmio_cfg) { + if (mmio_cfg->use_mmio) { + iounmap(mmio_cfg->addr); + release_resource(mmio_cfg->res); + return; + } + release_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE); } @@ -288,11 +334,13 @@ static int piix4_setup_sb800_smba(struct pci_dev *PIIX4_dev, u8 *smb_en_status, unsigned short *piix4_smba) { + struct sb800_mmio_cfg mmio_cfg; u8 smba_en_lo; u8 smba_en_hi; int retval; - retval = piix4_sb800_region_request(&PIIX4_dev->dev); + mmio_cfg.use_mmio = 0; + retval = piix4_sb800_region_request(&PIIX4_dev->dev, &mmio_cfg); if (retval) return retval; @@ -301,7 +349,7 @@ static int piix4_setup_sb800_smba(struct pci_dev *PIIX4_dev, outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX); smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1); - piix4_sb800_region_release(&PIIX4_dev->dev); + piix4_sb800_region_release(&PIIX4_dev->dev, &mmio_cfg); if (!smb_en) { *smb_en_status = smba_en_lo & 0x10; @@ -328,6 +376,7 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, unsigned short piix4_smba; u8 smb_en, smb_en_status, port_sel; u8 i2ccfg, i2ccfg_offset = 0x10; + struct sb800_mmio_cfg mmio_cfg; int retval; /* SB800 and later SMBus does not support forcing address */ @@ -407,7 +456,8 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT; } } else { - retval = piix4_sb800_region_request(&PIIX4_dev->dev); + mmio_cfg.use_mmio = 0; + retval = piix4_sb800_region_request(&PIIX4_dev->dev, &mmio_cfg); if (retval) { release_region(piix4_smba, SMBIOSIZE); return retval; @@ -420,7 +470,7 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, SB800_PIIX4_PORT_IDX; piix4_port_mask_sb800 = SB800_PIIX4_PORT_IDX_MASK; piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT; - piix4_sb800_region_release(&PIIX4_dev->dev); + piix4_sb800_region_release(&PIIX4_dev->dev, &mmio_cfg); } dev_info(&PIIX4_dev->dev, @@ -731,7 +781,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, u8 prev_port; int retval; - retval = piix4_sb800_region_request(&adap->dev); + retval = piix4_sb800_region_request(&adap->dev, &adapdata->mmio_cfg); if (retval) return retval; @@ -802,7 +852,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, piix4_imc_wakeup(); release: - piix4_sb800_region_release(&adap->dev); + piix4_sb800_region_release(&adap->dev, &adapdata->mmio_cfg); return retval; } -- GitLab From c4194b266bf7ef8ac5c38394b1427255598fb084 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 9 Feb 2022 11:27:15 -0600 Subject: [PATCH 0010/4335] i2c: piix4: Add EFCH MMIO support to SMBus base address detect commit 46967bc1ee93acd1d8953c87dc16f43de4076f93 upstream. The EFCH SMBus controller's base address is determined using details in FCH::PM::DECODEEN[smbusasfiobase] and FCH::PM::DECODEEN[smbusasfioen].These register fields were accessed using cd6h/cd7h port I/O. cd6h/cd7h port I/O is no longer available in later AMD processors. Change base address detection to use MMIO instead of port I/O cd6h/cd7h. Signed-off-by: Terry Bowman Reviewed-by: Andy Shevchenko Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-piix4.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index d06998f7b0314..7f312177eb279 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -344,10 +344,15 @@ static int piix4_setup_sb800_smba(struct pci_dev *PIIX4_dev, if (retval) return retval; - outb_p(smb_en, SB800_PIIX4_SMB_IDX); - smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1); - outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX); - smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1); + if (mmio_cfg.use_mmio) { + smba_en_lo = ioread8(mmio_cfg.addr); + smba_en_hi = ioread8(mmio_cfg.addr + 1); + } else { + outb_p(smb_en, SB800_PIIX4_SMB_IDX); + smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1); + outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX); + smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1); + } piix4_sb800_region_release(&PIIX4_dev->dev, &mmio_cfg); -- GitLab From d46b4ff3bb0bf707253ccba412ebf9a6b3aa1433 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 9 Feb 2022 11:27:16 -0600 Subject: [PATCH 0011/4335] i2c: piix4: Add EFCH MMIO support for SMBus port select commit 381a3083c6747ae5cdbef9b176d57d1b966db49f upstream. AMD processors include registers capable of selecting between 2 SMBus ports. Port selection is made during each user access by writing to FCH::PM::DECODEEN[smbus0sel]. Change the driver to use MMIO during SMBus port selection because cd6h/cd7h port I/O is not available on later AMD processors. Signed-off-by: Terry Bowman Reviewed-by: Andy Shevchenko Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-piix4.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 7f312177eb279..4789fc9ad2705 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -753,10 +753,19 @@ static void piix4_imc_wakeup(void) release_region(KERNCZ_IMC_IDX, 2); } -static int piix4_sb800_port_sel(u8 port) +static int piix4_sb800_port_sel(u8 port, struct sb800_mmio_cfg *mmio_cfg) { u8 smba_en_lo, val; + if (mmio_cfg->use_mmio) { + smba_en_lo = ioread8(mmio_cfg->addr + piix4_port_sel_sb800); + val = (smba_en_lo & ~piix4_port_mask_sb800) | port; + if (smba_en_lo != val) + iowrite8(val, mmio_cfg->addr + piix4_port_sel_sb800); + + return (smba_en_lo & piix4_port_mask_sb800); + } + outb_p(piix4_port_sel_sb800, SB800_PIIX4_SMB_IDX); smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1); @@ -843,12 +852,12 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, } } - prev_port = piix4_sb800_port_sel(adapdata->port); + prev_port = piix4_sb800_port_sel(adapdata->port, &adapdata->mmio_cfg); retval = piix4_access(adap, addr, flags, read_write, command, size, data); - piix4_sb800_port_sel(prev_port); + piix4_sb800_port_sel(prev_port, &adapdata->mmio_cfg); /* Release the semaphore */ outb_p(smbslvcnt | 0x20, SMBSLVCNT); -- GitLab From 5996d3601e774f7875537e5ba7aea0abf77441b0 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 9 Feb 2022 11:27:17 -0600 Subject: [PATCH 0012/4335] i2c: piix4: Enable EFCH MMIO for Family 17h+ commit 6cf72f41808ab5db1d7718b999b3ff0166e67e45 upstream. Enable EFCH MMIO using check for SMBus PCI revision ID value 0x51 or greater. This PCI revision ID check will enable family 17h and future AMD processors with the same EFCH SMBus controller HW. Signed-off-by: Terry Bowman Reviewed-by: Andy Shevchenko Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-piix4.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 4789fc9ad2705..ac8e7d60672a1 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -229,6 +229,18 @@ static void piix4_sb800_region_release(struct device *dev, release_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE); } +static bool piix4_sb800_use_mmio(struct pci_dev *PIIX4_dev) +{ + /* + * cd6h/cd7h port I/O accesses can be disabled on AMD processors + * w/ SMBus PCI revision ID 0x51 or greater. MMIO is supported on + * the same processors and is the recommended access method. + */ + return (PIIX4_dev->vendor == PCI_VENDOR_ID_AMD && + PIIX4_dev->device == PCI_DEVICE_ID_AMD_KERNCZ_SMBUS && + PIIX4_dev->revision >= 0x51); +} + static int piix4_setup(struct pci_dev *PIIX4_dev, const struct pci_device_id *id) { @@ -339,7 +351,7 @@ static int piix4_setup_sb800_smba(struct pci_dev *PIIX4_dev, u8 smba_en_hi; int retval; - mmio_cfg.use_mmio = 0; + mmio_cfg.use_mmio = piix4_sb800_use_mmio(PIIX4_dev); retval = piix4_sb800_region_request(&PIIX4_dev->dev, &mmio_cfg); if (retval) return retval; @@ -461,7 +473,7 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT; } } else { - mmio_cfg.use_mmio = 0; + mmio_cfg.use_mmio = piix4_sb800_use_mmio(PIIX4_dev); retval = piix4_sb800_region_request(&PIIX4_dev->dev, &mmio_cfg); if (retval) { release_region(piix4_smba, SMBIOSIZE); @@ -944,6 +956,7 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba, return -ENOMEM; } + adapdata->mmio_cfg.use_mmio = piix4_sb800_use_mmio(dev); adapdata->smba = smba; adapdata->sb800_main = sb800_main; adapdata->port = port << piix4_port_shift_sb800; -- GitLab From 8ed3e7523df3176d867140e44c9a17c501097947 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 2 Feb 2022 09:35:22 -0600 Subject: [PATCH 0013/4335] Watchdog: sp5100_tco: Move timer initialization into function commit abd71a948f7aab47ca49d3e7fe6afa6c48c8aae0 upstream. Refactor driver's timer initialization into new function. This is needed inorder to support adding new device layouts while using common timer initialization. Co-developed-by: Robert Richter Signed-off-by: Robert Richter Signed-off-by: Terry Bowman Tested-by: Jean Delvare Reviewed-by: Jean Delvare Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20220202153525.1693378-2-terry.bowman@amd.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/sp5100_tco.c | 65 +++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c index a730ecbf78cd5..15d3f093daa8a 100644 --- a/drivers/watchdog/sp5100_tco.c +++ b/drivers/watchdog/sp5100_tco.c @@ -215,6 +215,41 @@ static u32 sp5100_tco_read_pm_reg32(u8 index) return val; } +static int sp5100_tco_timer_init(struct sp5100_tco *tco) +{ + struct watchdog_device *wdd = &tco->wdd; + struct device *dev = wdd->parent; + u32 val; + + val = readl(SP5100_WDT_CONTROL(tco->tcobase)); + if (val & SP5100_WDT_DISABLED) { + dev_err(dev, "Watchdog hardware is disabled\n"); + return -ENODEV; + } + + /* + * Save WatchDogFired status, because WatchDogFired flag is + * cleared here. + */ + if (val & SP5100_WDT_FIRED) + wdd->bootstatus = WDIOF_CARDRESET; + + /* Set watchdog action to reset the system */ + val &= ~SP5100_WDT_ACTION_RESET; + writel(val, SP5100_WDT_CONTROL(tco->tcobase)); + + /* Set a reasonable heartbeat before we stop the timer */ + tco_timer_set_timeout(wdd, wdd->timeout); + + /* + * Stop the TCO before we change anything so we don't race with + * a zeroed timer. + */ + tco_timer_stop(wdd); + + return 0; +} + static int sp5100_tco_setupdevice(struct device *dev, struct watchdog_device *wdd) { @@ -340,35 +375,7 @@ static int sp5100_tco_setupdevice(struct device *dev, /* Setup the watchdog timer */ tco_timer_enable(tco); - val = readl(SP5100_WDT_CONTROL(tco->tcobase)); - if (val & SP5100_WDT_DISABLED) { - dev_err(dev, "Watchdog hardware is disabled\n"); - ret = -ENODEV; - goto unreg_region; - } - - /* - * Save WatchDogFired status, because WatchDogFired flag is - * cleared here. - */ - if (val & SP5100_WDT_FIRED) - wdd->bootstatus = WDIOF_CARDRESET; - /* Set watchdog action to reset the system */ - val &= ~SP5100_WDT_ACTION_RESET; - writel(val, SP5100_WDT_CONTROL(tco->tcobase)); - - /* Set a reasonable heartbeat before we stop the timer */ - tco_timer_set_timeout(wdd, wdd->timeout); - - /* - * Stop the TCO before we change anything so we don't race with - * a zeroed timer. - */ - tco_timer_stop(wdd); - - release_region(SP5100_IO_PM_INDEX_REG, SP5100_PM_IOPORTS_SIZE); - - return 0; + ret = sp5100_tco_timer_init(tco); unreg_region: release_region(SP5100_IO_PM_INDEX_REG, SP5100_PM_IOPORTS_SIZE); -- GitLab From 3bb1b58c53b0018289540a255f9f4caeff58bcc6 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 2 Feb 2022 09:35:23 -0600 Subject: [PATCH 0014/4335] Watchdog: sp5100_tco: Refactor MMIO base address initialization commit 1f182aca230086d4a4469c0f9136a6ea762d6385 upstream. Combine MMIO base address and alternate base address detection. Combine based on layout type. This will simplify the function by eliminating a switch case. Move existing request/release code into functions. This currently only supports port I/O request/release. The move into a separate function will make it ready for adding MMIO region support. Co-developed-by: Robert Richter Signed-off-by: Robert Richter Signed-off-by: Terry Bowman Tested-by: Jean Delvare Reviewed-by: Jean Delvare Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20220202153525.1693378-3-terry.bowman@amd.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/sp5100_tco.c | 155 ++++++++++++++++++---------------- drivers/watchdog/sp5100_tco.h | 1 + 2 files changed, 82 insertions(+), 74 deletions(-) diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c index 15d3f093daa8a..449cc983d580f 100644 --- a/drivers/watchdog/sp5100_tco.c +++ b/drivers/watchdog/sp5100_tco.c @@ -215,6 +215,55 @@ static u32 sp5100_tco_read_pm_reg32(u8 index) return val; } +static u32 sp5100_tco_request_region(struct device *dev, + u32 mmio_addr, + const char *dev_name) +{ + if (!devm_request_mem_region(dev, mmio_addr, SP5100_WDT_MEM_MAP_SIZE, + dev_name)) { + dev_dbg(dev, "MMIO address 0x%08x already in use\n", mmio_addr); + return 0; + } + + return mmio_addr; +} + +static u32 sp5100_tco_prepare_base(struct sp5100_tco *tco, + u32 mmio_addr, + u32 alt_mmio_addr, + const char *dev_name) +{ + struct device *dev = tco->wdd.parent; + + dev_dbg(dev, "Got 0x%08x from SBResource_MMIO register\n", mmio_addr); + + if (!mmio_addr && !alt_mmio_addr) + return -ENODEV; + + /* Check for MMIO address and alternate MMIO address conflicts */ + if (mmio_addr) + mmio_addr = sp5100_tco_request_region(dev, mmio_addr, dev_name); + + if (!mmio_addr && alt_mmio_addr) + mmio_addr = sp5100_tco_request_region(dev, alt_mmio_addr, dev_name); + + if (!mmio_addr) { + dev_err(dev, "Failed to reserve MMIO or alternate MMIO region\n"); + return -EBUSY; + } + + tco->tcobase = devm_ioremap(dev, mmio_addr, SP5100_WDT_MEM_MAP_SIZE); + if (!tco->tcobase) { + dev_err(dev, "MMIO address 0x%08x failed mapping\n", mmio_addr); + devm_release_mem_region(dev, mmio_addr, SP5100_WDT_MEM_MAP_SIZE); + return -ENOMEM; + } + + dev_info(dev, "Using 0x%08x for watchdog MMIO address\n", mmio_addr); + + return 0; +} + static int sp5100_tco_timer_init(struct sp5100_tco *tco) { struct watchdog_device *wdd = &tco->wdd; @@ -256,6 +305,7 @@ static int sp5100_tco_setupdevice(struct device *dev, struct sp5100_tco *tco = watchdog_get_drvdata(wdd); const char *dev_name; u32 mmio_addr = 0, val; + u32 alt_mmio_addr = 0; int ret; /* Request the IO ports used by this driver */ @@ -274,11 +324,32 @@ static int sp5100_tco_setupdevice(struct device *dev, dev_name = SP5100_DEVNAME; mmio_addr = sp5100_tco_read_pm_reg32(SP5100_PM_WATCHDOG_BASE) & 0xfffffff8; + + /* + * Secondly, find the watchdog timer MMIO address + * from SBResource_MMIO register. + */ + + /* Read SBResource_MMIO from PCI config(PCI_Reg: 9Ch) */ + pci_read_config_dword(sp5100_tco_pci, + SP5100_SB_RESOURCE_MMIO_BASE, + &val); + + /* Verify MMIO is enabled and using bar0 */ + if ((val & SB800_ACPI_MMIO_MASK) == SB800_ACPI_MMIO_DECODE_EN) + alt_mmio_addr = (val & ~0xfff) + SB800_PM_WDT_MMIO_OFFSET; break; case sb800: dev_name = SB800_DEVNAME; mmio_addr = sp5100_tco_read_pm_reg32(SB800_PM_WATCHDOG_BASE) & 0xfffffff8; + + /* Read SBResource_MMIO from AcpiMmioEn(PM_Reg: 24h) */ + val = sp5100_tco_read_pm_reg32(SB800_PM_ACPI_MMIO_EN); + + /* Verify MMIO is enabled and using bar0 */ + if ((val & SB800_ACPI_MMIO_MASK) == SB800_ACPI_MMIO_DECODE_EN) + alt_mmio_addr = (val & ~0xfff) + SB800_PM_WDT_MMIO_OFFSET; break; case efch: dev_name = SB800_DEVNAME; @@ -297,87 +368,23 @@ static int sp5100_tco_setupdevice(struct device *dev, val = sp5100_tco_read_pm_reg8(EFCH_PM_DECODEEN); if (val & EFCH_PM_DECODEEN_WDT_TMREN) mmio_addr = EFCH_PM_WDT_ADDR; + + val = sp5100_tco_read_pm_reg8(EFCH_PM_ISACONTROL); + if (val & EFCH_PM_ISACONTROL_MMIOEN) + alt_mmio_addr = EFCH_PM_ACPI_MMIO_ADDR + + EFCH_PM_ACPI_MMIO_WDT_OFFSET; break; default: return -ENODEV; } - /* Check MMIO address conflict */ - if (!mmio_addr || - !devm_request_mem_region(dev, mmio_addr, SP5100_WDT_MEM_MAP_SIZE, - dev_name)) { - if (mmio_addr) - dev_dbg(dev, "MMIO address 0x%08x already in use\n", - mmio_addr); - switch (tco->tco_reg_layout) { - case sp5100: - /* - * Secondly, Find the watchdog timer MMIO address - * from SBResource_MMIO register. - */ - /* Read SBResource_MMIO from PCI config(PCI_Reg: 9Ch) */ - pci_read_config_dword(sp5100_tco_pci, - SP5100_SB_RESOURCE_MMIO_BASE, - &mmio_addr); - if ((mmio_addr & (SB800_ACPI_MMIO_DECODE_EN | - SB800_ACPI_MMIO_SEL)) != - SB800_ACPI_MMIO_DECODE_EN) { - ret = -ENODEV; - goto unreg_region; - } - mmio_addr &= ~0xFFF; - mmio_addr += SB800_PM_WDT_MMIO_OFFSET; - break; - case sb800: - /* Read SBResource_MMIO from AcpiMmioEn(PM_Reg: 24h) */ - mmio_addr = - sp5100_tco_read_pm_reg32(SB800_PM_ACPI_MMIO_EN); - if ((mmio_addr & (SB800_ACPI_MMIO_DECODE_EN | - SB800_ACPI_MMIO_SEL)) != - SB800_ACPI_MMIO_DECODE_EN) { - ret = -ENODEV; - goto unreg_region; - } - mmio_addr &= ~0xFFF; - mmio_addr += SB800_PM_WDT_MMIO_OFFSET; - break; - case efch: - val = sp5100_tco_read_pm_reg8(EFCH_PM_ISACONTROL); - if (!(val & EFCH_PM_ISACONTROL_MMIOEN)) { - ret = -ENODEV; - goto unreg_region; - } - mmio_addr = EFCH_PM_ACPI_MMIO_ADDR + - EFCH_PM_ACPI_MMIO_WDT_OFFSET; - break; - } - dev_dbg(dev, "Got 0x%08x from SBResource_MMIO register\n", - mmio_addr); - if (!devm_request_mem_region(dev, mmio_addr, - SP5100_WDT_MEM_MAP_SIZE, - dev_name)) { - dev_dbg(dev, "MMIO address 0x%08x already in use\n", - mmio_addr); - ret = -EBUSY; - goto unreg_region; - } - } - - tco->tcobase = devm_ioremap(dev, mmio_addr, SP5100_WDT_MEM_MAP_SIZE); - if (!tco->tcobase) { - dev_err(dev, "failed to get tcobase address\n"); - ret = -ENOMEM; - goto unreg_region; + ret = sp5100_tco_prepare_base(tco, mmio_addr, alt_mmio_addr, dev_name); + if (!ret) { + /* Setup the watchdog timer */ + tco_timer_enable(tco); + ret = sp5100_tco_timer_init(tco); } - dev_info(dev, "Using 0x%08x for watchdog MMIO address\n", mmio_addr); - - /* Setup the watchdog timer */ - tco_timer_enable(tco); - - ret = sp5100_tco_timer_init(tco); - -unreg_region: release_region(SP5100_IO_PM_INDEX_REG, SP5100_PM_IOPORTS_SIZE); return ret; } diff --git a/drivers/watchdog/sp5100_tco.h b/drivers/watchdog/sp5100_tco.h index adf015aa4126f..daee872f9b712 100644 --- a/drivers/watchdog/sp5100_tco.h +++ b/drivers/watchdog/sp5100_tco.h @@ -58,6 +58,7 @@ #define SB800_PM_WATCHDOG_SECOND_RES GENMASK(1, 0) #define SB800_ACPI_MMIO_DECODE_EN BIT(0) #define SB800_ACPI_MMIO_SEL BIT(1) +#define SB800_ACPI_MMIO_MASK GENMASK(1, 0) #define SB800_PM_WDT_MMIO_OFFSET 0xB00 -- GitLab From b4c0f1600df43245c8c3425dbd9426fdfba6c4b2 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 2 Feb 2022 09:35:24 -0600 Subject: [PATCH 0015/4335] Watchdog: sp5100_tco: Add initialization using EFCH MMIO commit 0578fff4aae5bce3f09875f58e68e9ffbab8daf5 upstream. cd6h/cd7h port I/O can be disabled on recent AMD hardware. Read accesses to disabled cd6h/cd7h port I/O will return F's and written data is dropped. It is recommended to replace the cd6h/cd7h port I/O with MMIO. Co-developed-by: Robert Richter Signed-off-by: Robert Richter Signed-off-by: Terry Bowman Tested-by: Jean Delvare Reviewed-by: Jean Delvare Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20220202153525.1693378-4-terry.bowman@amd.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/sp5100_tco.c | 100 +++++++++++++++++++++++++++++++++- drivers/watchdog/sp5100_tco.h | 5 ++ 2 files changed, 104 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c index 449cc983d580f..4d72af373c0d9 100644 --- a/drivers/watchdog/sp5100_tco.c +++ b/drivers/watchdog/sp5100_tco.c @@ -48,7 +48,7 @@ /* internal variables */ enum tco_reg_layout { - sp5100, sb800, efch + sp5100, sb800, efch, efch_mmio }; struct sp5100_tco { @@ -201,6 +201,8 @@ static void tco_timer_enable(struct sp5100_tco *tco) ~EFCH_PM_WATCHDOG_DISABLE, EFCH_PM_DECODEEN_SECOND_RES); break; + default: + break; } } @@ -299,6 +301,99 @@ static int sp5100_tco_timer_init(struct sp5100_tco *tco) return 0; } +static u8 efch_read_pm_reg8(void __iomem *addr, u8 index) +{ + return readb(addr + index); +} + +static void efch_update_pm_reg8(void __iomem *addr, u8 index, u8 reset, u8 set) +{ + u8 val; + + val = readb(addr + index); + val &= reset; + val |= set; + writeb(val, addr + index); +} + +static void tco_timer_enable_mmio(void __iomem *addr) +{ + efch_update_pm_reg8(addr, EFCH_PM_DECODEEN3, + ~EFCH_PM_WATCHDOG_DISABLE, + EFCH_PM_DECODEEN_SECOND_RES); +} + +static int sp5100_tco_setupdevice_mmio(struct device *dev, + struct watchdog_device *wdd) +{ + struct sp5100_tco *tco = watchdog_get_drvdata(wdd); + const char *dev_name = SB800_DEVNAME; + u32 mmio_addr = 0, alt_mmio_addr = 0; + struct resource *res; + void __iomem *addr; + int ret; + u32 val; + + res = request_mem_region_muxed(EFCH_PM_ACPI_MMIO_PM_ADDR, + EFCH_PM_ACPI_MMIO_PM_SIZE, + "sp5100_tco"); + + if (!res) { + dev_err(dev, + "Memory region 0x%08x already in use\n", + EFCH_PM_ACPI_MMIO_PM_ADDR); + return -EBUSY; + } + + addr = ioremap(EFCH_PM_ACPI_MMIO_PM_ADDR, EFCH_PM_ACPI_MMIO_PM_SIZE); + if (!addr) { + dev_err(dev, "Address mapping failed\n"); + ret = -ENOMEM; + goto out; + } + + /* + * EFCH_PM_DECODEEN_WDT_TMREN is dual purpose. This bitfield + * enables sp5100_tco register MMIO space decoding. The bitfield + * also starts the timer operation. Enable if not already enabled. + */ + val = efch_read_pm_reg8(addr, EFCH_PM_DECODEEN); + if (!(val & EFCH_PM_DECODEEN_WDT_TMREN)) { + efch_update_pm_reg8(addr, EFCH_PM_DECODEEN, 0xff, + EFCH_PM_DECODEEN_WDT_TMREN); + } + + /* Error if the timer could not be enabled */ + val = efch_read_pm_reg8(addr, EFCH_PM_DECODEEN); + if (!(val & EFCH_PM_DECODEEN_WDT_TMREN)) { + dev_err(dev, "Failed to enable the timer\n"); + ret = -EFAULT; + goto out; + } + + mmio_addr = EFCH_PM_WDT_ADDR; + + /* Determine alternate MMIO base address */ + val = efch_read_pm_reg8(addr, EFCH_PM_ISACONTROL); + if (val & EFCH_PM_ISACONTROL_MMIOEN) + alt_mmio_addr = EFCH_PM_ACPI_MMIO_ADDR + + EFCH_PM_ACPI_MMIO_WDT_OFFSET; + + ret = sp5100_tco_prepare_base(tco, mmio_addr, alt_mmio_addr, dev_name); + if (!ret) { + tco_timer_enable_mmio(addr); + ret = sp5100_tco_timer_init(tco); + } + +out: + if (addr) + iounmap(addr); + + release_resource(res); + + return ret; +} + static int sp5100_tco_setupdevice(struct device *dev, struct watchdog_device *wdd) { @@ -308,6 +403,9 @@ static int sp5100_tco_setupdevice(struct device *dev, u32 alt_mmio_addr = 0; int ret; + if (tco->tco_reg_layout == efch_mmio) + return sp5100_tco_setupdevice_mmio(dev, wdd); + /* Request the IO ports used by this driver */ if (!request_muxed_region(SP5100_IO_PM_INDEX_REG, SP5100_PM_IOPORTS_SIZE, "sp5100_tco")) { diff --git a/drivers/watchdog/sp5100_tco.h b/drivers/watchdog/sp5100_tco.h index daee872f9b712..8ca1b215e3cef 100644 --- a/drivers/watchdog/sp5100_tco.h +++ b/drivers/watchdog/sp5100_tco.h @@ -83,4 +83,9 @@ #define EFCH_PM_ISACONTROL_MMIOEN BIT(1) #define EFCH_PM_ACPI_MMIO_ADDR 0xfed80000 +#define EFCH_PM_ACPI_MMIO_PM_OFFSET 0x00000300 #define EFCH_PM_ACPI_MMIO_WDT_OFFSET 0x00000b00 + +#define EFCH_PM_ACPI_MMIO_PM_ADDR (EFCH_PM_ACPI_MMIO_ADDR + \ + EFCH_PM_ACPI_MMIO_PM_OFFSET) +#define EFCH_PM_ACPI_MMIO_PM_SIZE 8 -- GitLab From 486bcceeed0423232f502add0333f70b0a659015 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 2 Feb 2022 09:35:25 -0600 Subject: [PATCH 0016/4335] Watchdog: sp5100_tco: Enable Family 17h+ CPUs commit 826270373f17fd8ebd10753ca0a5fd2ceb1dc38e upstream. The driver currently uses a CPU family match of 17h to determine EFCH_PM_DECODEEN_WDT_TMREN register support. This family check will not support future AMD CPUs and instead will require driver updates to add support. Remove the family 17h family check and add a check for SMBus PCI revision ID 0x51 or greater. The MMIO access method has been available since at least SMBus controllers using PCI revision 0x51. This revision check will support family 17h and future AMD processors including EFCH functionality without requiring driver changes. Co-developed-by: Robert Richter Signed-off-by: Robert Richter Signed-off-by: Terry Bowman Tested-by: Jean Delvare Reviewed-by: Jean Delvare Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20220202153525.1693378-5-terry.bowman@amd.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/sp5100_tco.c | 16 ++++------------ drivers/watchdog/sp5100_tco.h | 1 + 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c index 4d72af373c0d9..4820af929a826 100644 --- a/drivers/watchdog/sp5100_tco.c +++ b/drivers/watchdog/sp5100_tco.c @@ -85,6 +85,10 @@ static enum tco_reg_layout tco_reg_layout(struct pci_dev *dev) dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS && dev->revision < 0x40) { return sp5100; + } else if (dev->vendor == PCI_VENDOR_ID_AMD && + sp5100_tco_pci->device == PCI_DEVICE_ID_AMD_KERNCZ_SMBUS && + sp5100_tco_pci->revision >= AMD_ZEN_SMBUS_PCI_REV) { + return efch_mmio; } else if (dev->vendor == PCI_VENDOR_ID_AMD && ((dev->device == PCI_DEVICE_ID_AMD_HUDSON2_SMBUS && dev->revision >= 0x41) || @@ -451,18 +455,6 @@ static int sp5100_tco_setupdevice(struct device *dev, break; case efch: dev_name = SB800_DEVNAME; - /* - * On Family 17h devices, the EFCH_PM_DECODEEN_WDT_TMREN bit of - * EFCH_PM_DECODEEN not only enables the EFCH_PM_WDT_ADDR memory - * region, it also enables the watchdog itself. - */ - if (boot_cpu_data.x86 == 0x17) { - val = sp5100_tco_read_pm_reg8(EFCH_PM_DECODEEN); - if (!(val & EFCH_PM_DECODEEN_WDT_TMREN)) { - sp5100_tco_update_pm_reg8(EFCH_PM_DECODEEN, 0xff, - EFCH_PM_DECODEEN_WDT_TMREN); - } - } val = sp5100_tco_read_pm_reg8(EFCH_PM_DECODEEN); if (val & EFCH_PM_DECODEEN_WDT_TMREN) mmio_addr = EFCH_PM_WDT_ADDR; diff --git a/drivers/watchdog/sp5100_tco.h b/drivers/watchdog/sp5100_tco.h index 8ca1b215e3cef..6a0986d2c94b7 100644 --- a/drivers/watchdog/sp5100_tco.h +++ b/drivers/watchdog/sp5100_tco.h @@ -89,3 +89,4 @@ #define EFCH_PM_ACPI_MMIO_PM_ADDR (EFCH_PM_ACPI_MMIO_ADDR + \ EFCH_PM_ACPI_MMIO_PM_OFFSET) #define EFCH_PM_ACPI_MMIO_PM_SIZE 8 +#define AMD_ZEN_SMBUS_PCI_REV 0x51 -- GitLab From a122529082212ebb1adf358f43844f6823aa7ac3 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Mon, 9 May 2022 17:34:29 -0700 Subject: [PATCH 0017/4335] mm/kfence: reset PG_slab and memcg_data before freeing __kfence_pool commit 2839b0999c20c9f6bf353849c69370e121e2fa1a upstream. When kfence fails to initialize kfence pool, it frees the pool. But it does not reset memcg_data and PG_slab flag. Below is a BUG because of this. Let's fix it by resetting memcg_data and PG_slab flag before free. [ 0.089149] BUG: Bad page state in process swapper/0 pfn:3d8e06 [ 0.089149] page:ffffea46cf638180 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x3d8e06 [ 0.089150] memcg:ffffffff94a475d1 [ 0.089150] flags: 0x17ffffc0000200(slab|node=0|zone=2|lastcpupid=0x1fffff) [ 0.089151] raw: 0017ffffc0000200 ffffea46cf638188 ffffea46cf638188 0000000000000000 [ 0.089152] raw: 0000000000000000 0000000000000000 00000000ffffffff ffffffff94a475d1 [ 0.089152] page dumped because: page still charged to cgroup [ 0.089153] Modules linked in: [ 0.089153] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G B W 5.18.0-rc1+ #965 [ 0.089154] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 [ 0.089154] Call Trace: [ 0.089155] [ 0.089155] dump_stack_lvl+0x49/0x5f [ 0.089157] dump_stack+0x10/0x12 [ 0.089158] bad_page.cold+0x63/0x94 [ 0.089159] check_free_page_bad+0x66/0x70 [ 0.089160] __free_pages_ok+0x423/0x530 [ 0.089161] __free_pages_core+0x8e/0xa0 [ 0.089162] memblock_free_pages+0x10/0x12 [ 0.089164] memblock_free_late+0x8f/0xb9 [ 0.089165] kfence_init+0x68/0x92 [ 0.089166] start_kernel+0x789/0x992 [ 0.089167] x86_64_start_reservations+0x24/0x26 [ 0.089168] x86_64_start_kernel+0xa9/0xaf [ 0.089170] secondary_startup_64_no_verify+0xd5/0xdb [ 0.089171] Link: https://lkml.kernel.org/r/YnPG3pQrqfcgOlVa@hyeyoo Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Fixes: 8f0b36497303 ("mm: kfence: fix objcgs vector allocation") Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Marco Elver Reviewed-by: Muchun Song Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton [42.hyeyoo@gmail.com: backport - use struct page] Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Greg Kroah-Hartman --- mm/kfence/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 66076d8742b78..d25202766fbba 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -510,6 +510,7 @@ static bool __init kfence_init_pool(void) unsigned long addr = (unsigned long)__kfence_pool; struct page *pages; int i; + char *p; if (!__kfence_pool) return false; @@ -592,6 +593,16 @@ err: * fails for the first page, and therefore expect addr==__kfence_pool in * most failure cases. */ + for (p = (char *)addr; p < __kfence_pool + KFENCE_POOL_SIZE; p += PAGE_SIZE) { + struct page *page = virt_to_page(p); + + if (!PageSlab(page)) + continue; +#ifdef CONFIG_MEMCG + page->memcg_data = 0; +#endif + __ClearPageSlab(page); + } memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool)); __kfence_pool = NULL; return false; -- GitLab From 11e6a90ffd6294083b808d34ccc5a5ea18ed603e Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Mon, 16 May 2022 17:08:35 -0700 Subject: [PATCH 0018/4335] Revert "drm/i915/opregion: check port number bounds for SWSCI display power state" This reverts commit b84857c06ef9e72d09fadafdbb3ce9af64af954f. 5.10 stable contains 2 identical commits: 1. commit eb7bf11e8ef1 ("drm/i915/opregion: check port number bounds for SWSCI display power state") 2. commit b84857c06ef9 ("drm/i915/opregion: check port number bounds for SWSCI display power state") Both commits add separate checks for the same condition. Revert the 2nd redundant check to match upstream, which only has one check. Signed-off-by: Greg Thelen Signed-off-by: Yu Liao Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_opregion.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 08c20869d7e91..f7f49b69830fa 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -376,21 +376,6 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, return -EINVAL; } - /* - * The port numbering and mapping here is bizarre. The now-obsolete - * swsci spec supports ports numbered [0..4]. Port E is handled as a - * special case, but port F and beyond are not. The functionality is - * supposed to be obsolete for new platforms. Just bail out if the port - * number is out of bounds after mapping. - */ - if (port > 4) { - drm_dbg_kms(&dev_priv->drm, - "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", - intel_encoder->base.base.id, intel_encoder->base.name, - port_name(intel_encoder->port), port); - return -EINVAL; - } - if (!enable) parm |= 4 << 8; -- GitLab From 25ddeb6be3d94a112958c000136751cbca57015b Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 10 Dec 2021 17:09:51 +0100 Subject: [PATCH 0019/4335] rtc: fix use-after-free on device removal [ Upstream commit c8fa17d9f08a448184f03d352145099b5beb618e ] If the irqwork is still scheduled or running while the RTC device is removed, a use-after-free occurs in rtc_timer_do_work(). Cleanup the timerqueue and ensure the work is stopped to fix this. BUG: KASAN: use-after-free in mutex_lock+0x94/0x110 Write of size 8 at addr ffffff801d846338 by task kworker/3:1/41 Workqueue: events rtc_timer_do_work Call trace: mutex_lock+0x94/0x110 rtc_timer_do_work+0xec/0x630 process_one_work+0x5fc/0x1344 ... Allocated by task 551: kmem_cache_alloc_trace+0x384/0x6e0 devm_rtc_allocate_device+0xf0/0x574 devm_rtc_device_register+0x2c/0x12c ... Freed by task 572: kfree+0x114/0x4d0 rtc_device_release+0x64/0x80 device_release+0x8c/0x1f4 kobject_put+0x1c4/0x4b0 put_device+0x20/0x30 devm_rtc_release_device+0x1c/0x30 devm_action_release+0x54/0x90 release_nodes+0x124/0x310 devres_release_group+0x170/0x240 i2c_device_remove+0xd8/0x314 ... Last potentially related work creation: insert_work+0x5c/0x330 queue_work_on+0xcc/0x154 rtc_set_time+0x188/0x5bc rtc_dev_ioctl+0x2ac/0xbd0 ... Signed-off-by: Vincent Whitchurch Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211210160951.7718-1-vincent.whitchurch@axis.com Signed-off-by: Sasha Levin --- drivers/rtc/class.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index f77bc089eb6b7..0aef7df2ea704 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -26,6 +26,15 @@ struct class *rtc_class; static void rtc_device_release(struct device *dev) { struct rtc_device *rtc = to_rtc_device(dev); + struct timerqueue_head *head = &rtc->timerqueue; + struct timerqueue_node *node; + + mutex_lock(&rtc->ops_lock); + while ((node = timerqueue_getnext(head))) + timerqueue_del(head, node); + mutex_unlock(&rtc->ops_lock); + + cancel_work_sync(&rtc->irqwork); ida_simple_remove(&rtc_ida, rtc->id); mutex_destroy(&rtc->ops_lock); -- GitLab From fe9f2f3a68fce6ab3fcedc3a316152061d0cd577 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 8 Feb 2022 11:29:07 -0500 Subject: [PATCH 0020/4335] rtc: pcf2127: fix bug when reading alarm registers [ Upstream commit 73ce05302007eece23a6acb7dc124c92a2209087 ] The first bug is that reading the 5 alarm registers results in a read operation of 20 bytes. The reason is because the destination buffer is defined as an array of "unsigned int", and we use the sizeof() operator on this array to define the bulk read count. The second bug is that the read value is invalid, because we are indexing the destination buffer as integers (4 bytes), instead of indexing it as u8. Changing the destination buffer type to u8 fixes both problems. Signed-off-by: Hugo Villeneuve Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220208162908.3182581-1-hugo@hugovil.com Signed-off-by: Sasha Levin --- drivers/rtc/rtc-pcf2127.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 56c58b055dfff..43f8011070952 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -374,7 +374,8 @@ static int pcf2127_watchdog_init(struct device *dev, struct pcf2127 *pcf2127) static int pcf2127_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) { struct pcf2127 *pcf2127 = dev_get_drvdata(dev); - unsigned int buf[5], ctrl2; + u8 buf[5]; + unsigned int ctrl2; int ret; ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2); -- GitLab From 3ec283635457171afd8efee7a61d3678d57cdc85 Mon Sep 17 00:00:00 2001 From: David Gow Date: Thu, 10 Feb 2022 11:43:53 +0800 Subject: [PATCH 0021/4335] um: Cleanup syscall_handler_t definition/cast, fix warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f4f03f299a56ce4d73c5431e0327b3b6cb55ebb9 ] The syscall_handler_t type for x86_64 was defined as 'long (*)(void)', but always cast to 'long (*)(long, long, long, long, long, long)' before use. This now triggers a warning (see below). Define syscall_handler_t as the latter instead, and remove the cast. This simplifies the code, and fixes the warning. Warning: In file included from ../arch/um/include/asm/processor-generic.h:13 from ../arch/x86/um/asm/processor.h:41 from ../include/linux/rcupdate.h:30 from ../include/linux/rculist.h:11 from ../include/linux/pid.h:5 from ../include/linux/sched.h:14 from ../include/linux/ptrace.h:6 from ../arch/um/kernel/skas/syscall.c:7: ../arch/um/kernel/skas/syscall.c: In function ‘handle_syscall’: ../arch/x86/um/shared/sysdep/syscalls_64.h:18:11: warning: cast between incompatible function types from ‘long int (*)(void)’ to ‘long int (*)(long int, long int, long int, long int, long int, long int)’ [ -Wcast-function-type] 18 | (((long (*)(long, long, long, long, long, long)) \ | ^ ../arch/x86/um/asm/ptrace.h:36:62: note: in definition of macro ‘PT_REGS_SET_SYSCALL_RETURN’ 36 | #define PT_REGS_SET_SYSCALL_RETURN(r, res) (PT_REGS_AX(r) = (res)) | ^~~ ../arch/um/kernel/skas/syscall.c:46:33: note: in expansion of macro ‘EXECUTE_SYSCALL’ 46 | EXECUTE_SYSCALL(syscall, regs)); | ^~~~~~~~~~~~~~~ Signed-off-by: David Gow Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/x86/um/shared/sysdep/syscalls_64.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/um/shared/sysdep/syscalls_64.h b/arch/x86/um/shared/sysdep/syscalls_64.h index 8a7d5e1da98e5..1e6875b4ffd83 100644 --- a/arch/x86/um/shared/sysdep/syscalls_64.h +++ b/arch/x86/um/shared/sysdep/syscalls_64.h @@ -10,13 +10,12 @@ #include #include -typedef long syscall_handler_t(void); +typedef long syscall_handler_t(long, long, long, long, long, long); extern syscall_handler_t *sys_call_table[]; #define EXECUTE_SYSCALL(syscall, regs) \ - (((long (*)(long, long, long, long, long, long)) \ - (*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ + (((*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ UPT_SYSCALL_ARG2(®s->regs), \ UPT_SYSCALL_ARG3(®s->regs), \ UPT_SYSCALL_ARG4(®s->regs), \ -- GitLab From 0211383109832103cfddfd5c5cc99b29d40bb749 Mon Sep 17 00:00:00 2001 From: Jeff LaBundy Date: Sun, 20 Mar 2022 21:55:27 -0700 Subject: [PATCH 0022/4335] Input: add bounds checking to input_set_capability() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 409353cbe9fe48f6bc196114c442b1cff05a39bc ] Update input_set_capability() to prevent kernel panic in case the event code exceeds the bitmap for the given event type. Suggested-by: Tomasz Moń Signed-off-by: Jeff LaBundy Reviewed-by: Tomasz Moń Link: https://lore.kernel.org/r/20220320032537.545250-1-jeff@labundy.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/input.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/input/input.c b/drivers/input/input.c index ccaeb24263854..ba246fabc6c17 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -47,6 +47,17 @@ static DEFINE_MUTEX(input_mutex); static const struct input_value input_value_sync = { EV_SYN, SYN_REPORT, 1 }; +static const unsigned int input_max_code[EV_CNT] = { + [EV_KEY] = KEY_MAX, + [EV_REL] = REL_MAX, + [EV_ABS] = ABS_MAX, + [EV_MSC] = MSC_MAX, + [EV_SW] = SW_MAX, + [EV_LED] = LED_MAX, + [EV_SND] = SND_MAX, + [EV_FF] = FF_MAX, +}; + static inline int is_event_supported(unsigned int code, unsigned long *bm, unsigned int max) { @@ -2074,6 +2085,14 @@ EXPORT_SYMBOL(input_get_timestamp); */ void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code) { + if (type < EV_CNT && input_max_code[type] && + code > input_max_code[type]) { + pr_err("%s: invalid code %u for type %u\n", __func__, code, + type); + dump_stack(); + return; + } + switch (type) { case EV_KEY: __set_bit(code, dev->keybit); -- GitLab From a6ae0d0b7b6b374186db7044a441d0c022ca8fd6 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Sun, 20 Mar 2022 21:56:38 -0700 Subject: [PATCH 0023/4335] Input: stmfts - fix reference leak in stmfts_input_open [ Upstream commit 26623eea0da3476446909af96c980768df07bbd9 ] pm_runtime_get_sync() will increment pm usage counter even it failed. Forgetting to call pm_runtime_put_noidle will result in reference leak in stmfts_input_open, so we should fix it. Signed-off-by: Zheng Yongjun Link: https://lore.kernel.org/r/20220317131604.53538-1-zhengyongjun3@huawei.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/stmfts.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/stmfts.c b/drivers/input/touchscreen/stmfts.c index bc11203c9cf78..72e0b767e1ba4 100644 --- a/drivers/input/touchscreen/stmfts.c +++ b/drivers/input/touchscreen/stmfts.c @@ -339,11 +339,11 @@ static int stmfts_input_open(struct input_dev *dev) err = pm_runtime_get_sync(&sdata->client->dev); if (err < 0) - return err; + goto out; err = i2c_smbus_write_byte(sdata->client, STMFTS_MS_MT_SENSE_ON); if (err) - return err; + goto out; mutex_lock(&sdata->mutex); sdata->running = true; @@ -366,7 +366,9 @@ static int stmfts_input_open(struct input_dev *dev) "failed to enable touchkey\n"); } - return 0; +out: + pm_runtime_put_noidle(&sdata->client->dev); + return err; } static void stmfts_input_close(struct input_dev *dev) -- GitLab From 3fbccc4082c1a919c799df466af24479fcc0e216 Mon Sep 17 00:00:00 2001 From: Monish Kumar R Date: Wed, 16 Mar 2022 13:24:49 +0530 Subject: [PATCH 0024/4335] nvme-pci: add quirks for Samsung X5 SSDs [ Upstream commit bc360b0b1611566e1bd47384daf49af6a1c51837 ] Add quirks to not fail the initialization and to have quick resume latency after cold/warm reboot. Signed-off-by: Monish Kumar R Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d7695bdbde8d3..e6f55cf6e494a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3379,7 +3379,10 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_128_BYTES_SQES | NVME_QUIRK_SHARED_TAGS | NVME_QUIRK_SKIP_CID_GEN }, - + { PCI_DEVICE(0x144d, 0xa808), /* Samsung X5 */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY| + NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { 0, } }; -- GitLab From 8b2ea9a3a159fcdad4b22513be84189fc0db1666 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 14 Mar 2022 18:32:02 +0100 Subject: [PATCH 0025/4335] gfs2: Disable page faults during lockless buffered reads [ Upstream commit 52f3f033a5dbd023307520af1ff551cadfd7f037 ] During lockless buffered reads, filemap_read() holds page cache page references while trying to copy data to the user-space buffer. The calling process isn't holding the inode glock, but the page references it holds prevent those pages from being removed from the page cache, and that prevents the underlying inode glock from being moved to another node. Thus, we can end up in the same kinds of distributed deadlock situations as with normal (non-lockless) buffered reads. Fix that by disabling page faults during lockless reads as well. Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index eb5ea0262f3c4..60390f9dc31f4 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -963,14 +963,16 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; iocb->ki_flags &= ~IOCB_DIRECT; } + pagefault_disable(); iocb->ki_flags |= IOCB_NOIO; ret = generic_file_read_iter(iocb, to); iocb->ki_flags &= ~IOCB_NOIO; + pagefault_enable(); if (ret >= 0) { if (!iov_iter_count(to)) return ret; written = ret; - } else { + } else if (ret != -EFAULT) { if (ret != -EAGAIN) return ret; if (iocb->ki_flags & IOCB_NOWAIT) -- GitLab From 0a482fc4290b297d5ffb855c11026aa0b69f0409 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 11 Feb 2022 12:26:28 +0000 Subject: [PATCH 0026/4335] rtc: sun6i: Fix time overflow handling [ Upstream commit 9f6cd82eca7e91a0d0311242a87c6aa3c2737968 ] Using "unsigned long" for UNIX timestamps is never a good idea, and comparing the value of such a variable against U32_MAX does not do anything useful on 32-bit systems. Use the proper time64_t type when dealing with timestamps, and avoid cutting down the time range unnecessarily. This also fixes the flawed check for the alarm time being too far into the future. The check for this condition is actually somewhat theoretical, as the RTC counts till 2033 only anyways, and 2^32 seconds from now is not before the year 2157 - at which point I hope nobody will be using this hardware anymore. Signed-off-by: Andre Przywara Reviewed-by: Jernej Skrabec Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220211122643.1343315-4-andre.przywara@arm.com Signed-off-by: Sasha Levin --- drivers/rtc/rtc-sun6i.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index adec1b14a8deb..c551ebf0ac00f 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -138,7 +138,7 @@ struct sun6i_rtc_dev { const struct sun6i_rtc_clk_data *data; void __iomem *base; int irq; - unsigned long alarm; + time64_t alarm; struct clk_hw hw; struct clk_hw *int_osc; @@ -510,10 +510,8 @@ static int sun6i_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm) struct sun6i_rtc_dev *chip = dev_get_drvdata(dev); struct rtc_time *alrm_tm = &wkalrm->time; struct rtc_time tm_now; - unsigned long time_now = 0; - unsigned long time_set = 0; - unsigned long time_gap = 0; - int ret = 0; + time64_t time_now, time_set; + int ret; ret = sun6i_rtc_gettime(dev, &tm_now); if (ret < 0) { @@ -528,9 +526,7 @@ static int sun6i_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm) return -EINVAL; } - time_gap = time_set - time_now; - - if (time_gap > U32_MAX) { + if ((time_set - time_now) > U32_MAX) { dev_err(dev, "Date too far in the future\n"); return -EINVAL; } @@ -539,7 +535,7 @@ static int sun6i_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm) writel(0, chip->base + SUN6I_ALRM_COUNTER); usleep_range(100, 300); - writel(time_gap, chip->base + SUN6I_ALRM_COUNTER); + writel(time_set - time_now, chip->base + SUN6I_ALRM_COUNTER); chip->alarm = time_set; sun6i_rtc_setaie(wkalrm->enabled, chip); -- GitLab From 625ff6f49789a971a88a275ab4c1587b445bf598 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Thu, 17 Mar 2022 13:16:13 +0000 Subject: [PATCH 0027/4335] crypto: stm32 - fix reference leak in stm32_crc_remove [ Upstream commit e9a36feecee0ee5845f2e0656f50f9942dd0bed3 ] pm_runtime_get_sync() will increment pm usage counter even it failed. Forgetting to call pm_runtime_put_noidle will result in reference leak in stm32_crc_remove, so we should fix it. Signed-off-by: Zheng Yongjun Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/stm32/stm32-crc32.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c index be1bf39a317de..90a920e7f6642 100644 --- a/drivers/crypto/stm32/stm32-crc32.c +++ b/drivers/crypto/stm32/stm32-crc32.c @@ -384,8 +384,10 @@ static int stm32_crc_remove(struct platform_device *pdev) struct stm32_crc *crc = platform_get_drvdata(pdev); int ret = pm_runtime_get_sync(crc->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(crc->dev); return ret; + } spin_lock(&crc_list.lock); list_del(&crc->list); -- GitLab From 4e640d4a9d14b552d52bf658d8ce6edcce1f62e4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 22 Mar 2022 12:48:10 +0100 Subject: [PATCH 0028/4335] crypto: x86/chacha20 - Avoid spurious jumps to other functions [ Upstream commit 4327d168515fd8b5b92fa1efdf1d219fb6514460 ] The chacha_Nblock_xor_avx512vl() functions all have their own, identical, .LdoneN label, however in one particular spot {2,4} jump to the 8 version instead of their own. Resulting in: arch/x86/crypto/chacha-x86_64.o: warning: objtool: chacha_2block_xor_avx512vl() falls through to next function chacha_8block_xor_avx512vl() arch/x86/crypto/chacha-x86_64.o: warning: objtool: chacha_4block_xor_avx512vl() falls through to next function chacha_8block_xor_avx512vl() Make each function consistently use its own done label. Reported-by: Stephen Rothwell Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Martin Willi Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- arch/x86/crypto/chacha-avx512vl-x86_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S index 946f74dd6fbaa..259383e1ad440 100644 --- a/arch/x86/crypto/chacha-avx512vl-x86_64.S +++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S @@ -172,7 +172,7 @@ SYM_FUNC_START(chacha_2block_xor_avx512vl) # xor remaining bytes from partial register into output mov %rcx,%rax and $0xf,%rcx - jz .Ldone8 + jz .Ldone2 mov %rax,%r9 and $~0xf,%r9 @@ -438,7 +438,7 @@ SYM_FUNC_START(chacha_4block_xor_avx512vl) # xor remaining bytes from partial register into output mov %rcx,%rax and $0xf,%rcx - jz .Ldone8 + jz .Ldone4 mov %rax,%r9 and $~0xf,%r9 -- GitLab From 3e3f28529a52465bd7387d0c414d6f98de11610a Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Sat, 26 Mar 2022 00:05:00 +0800 Subject: [PATCH 0029/4335] ALSA: hda/realtek: Enable headset mic on Lenovo P360 [ Upstream commit 5a8738571747c1e275a40b69a608657603867b7e ] Lenovo P360 is another platform equipped with ALC897, and it needs ALC897_FIXUP_HEADSET_MIC_PIN quirk to make its headset mic work. Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20220325160501.705221-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 30295283512c3..b2e98f15a4567 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10946,6 +10946,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x14cd, 0x5003, "USI", ALC662_FIXUP_USI_HEADSET_MODE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC662_FIXUP_LENOVO_MULTI_CODECS), + SND_PCI_QUIRK(0x17aa, 0x1057, "Lenovo P360", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x32ca, "Lenovo ThinkCentre M80", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x32cb, "Lenovo ThinkCentre M70", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x32cf, "Lenovo ThinkCentre M950", ALC897_FIXUP_HEADSET_MIC_PIN), -- GitLab From 2d5f611add95bc1a0da4d6db78c1ea59df4fc366 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 16 Mar 2022 19:13:20 +0100 Subject: [PATCH 0030/4335] s390/traps: improve panic message for translation-specification exception [ Upstream commit f09354ffd84eef3c88efa8ba6df05efe50cfd16a ] There are many different types of translation exceptions but only a translation-specification exception leads to a kernel panic since it indicates corrupted page tables, which must never happen. Improve the panic message so it is a bit more obvious what this is about. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/traps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 12d28ff5281fa..4044826d72ae5 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -142,10 +142,10 @@ static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc) do_trap(regs, SIGFPE, si_code, "floating point exception"); } -static void translation_exception(struct pt_regs *regs) +static void translation_specification_exception(struct pt_regs *regs) { /* May never happen. */ - panic("Translation exception"); + panic("Translation-Specification Exception"); } static void illegal_op(struct pt_regs *regs) @@ -374,7 +374,7 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = { [0x0f] = hfp_divide_exception, [0x10] = do_dat_exception, [0x11] = do_dat_exception, - [0x12] = translation_exception, + [0x12] = translation_specification_exception, [0x13] = special_op_exception, [0x14] = default_trap_handler, [0x15] = operand_exception, -- GitLab From 4e32c4c7016eeb2186ffa4ebcbadbddc4b071234 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 20 Sep 2021 09:32:21 +0200 Subject: [PATCH 0031/4335] s390/pci: improve zpci_dev reference counting [ Upstream commit c122383d221dfa2f41cfe5e672540595de986fde ] Currently zpci_dev uses kref based reference counting but only accounts for one original reference plus one reference from an added pci_dev to its underlying zpci_dev. Counting just the original reference worked until the pci_dev reference was added in commit 2a671f77ee49 ("s390/pci: fix use after free of zpci_dev") because once a zpci_dev goes away, i.e. enters the reserved state, it would immediately get released. However with the pci_dev reference this is no longer the case and the zpci_dev may still appear in multiple availability events indicating that it was reserved. This was solved by detecting when the zpci_dev is already on its way out but still hanging around. This has however shown some light on how unusual our zpci_dev reference counting is. Improve upon this by modelling zpci_dev reference counting on pci_dev. Analogous to pci_get_slot() increment the reference count in get_zdev_by_fid(). Thus all users of get_zdev_by_fid() must drop the reference once they are done with the zpci_dev. Similar to pci_scan_single_device(), zpci_create_device() returns the device with an initial count of 1 and the device added to the zpci_list (analogous to the PCI bus' device_list). In turn users of zpci_create_device() must only drop the reference once the device is gone from the point of view of the zPCI subsystem, it might still be referenced by the common PCI subsystem though. Reviewed-by: Matthew Rosato Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/pci/pci.c | 1 + arch/s390/pci/pci_bus.h | 3 ++- arch/s390/pci/pci_clp.c | 9 +++++++-- arch/s390/pci/pci_event.c | 7 ++++++- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index b833155ce8381..639924d983315 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -69,6 +69,7 @@ struct zpci_dev *get_zdev_by_fid(u32 fid) list_for_each_entry(tmp, &zpci_list, entry) { if (tmp->fid == fid) { zdev = tmp; + zpci_zdev_get(zdev); break; } } diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index e359d2686178b..ecef3a9e16c00 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -19,7 +19,8 @@ void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error); void zpci_release_device(struct kref *kref); static inline void zpci_zdev_put(struct zpci_dev *zdev) { - kref_put(&zdev->kref, zpci_release_device); + if (zdev) + kref_put(&zdev->kref, zpci_release_device); } static inline void zpci_zdev_get(struct zpci_dev *zdev) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index be077b39da336..5011d27461fd3 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -22,6 +22,8 @@ #include #include +#include "pci_bus.h" + bool zpci_unique_uid; void update_uid_checking(bool new) @@ -403,8 +405,11 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) return; zdev = get_zdev_by_fid(entry->fid); - if (!zdev) - zpci_create_device(entry->fid, entry->fh, entry->config_state); + if (zdev) { + zpci_zdev_put(zdev); + return; + } + zpci_create_device(entry->fid, entry->fh, entry->config_state); } int clp_scan_pci_devices(void) diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 5b8d647523f96..6d57625b8ed99 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -62,10 +62,12 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); if (!pdev) - return; + goto no_pdev; pdev->error_state = pci_channel_io_perm_failure; pci_dev_put(pdev); +no_pdev: + zpci_zdev_put(zdev); } void zpci_event_error(void *data) @@ -94,6 +96,7 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); + bool existing_zdev = !!zdev; enum zpci_state state; zpci_err("avail CCDF:\n"); @@ -156,6 +159,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) default: break; } + if (existing_zdev) + zpci_zdev_put(zdev); } void zpci_event_availability(void *data) -- GitLab From 87c54a0b64752c09f4e4e13e4959d66495fba227 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Tue, 22 Feb 2022 19:54:25 +0800 Subject: [PATCH 0032/4335] vhost_vdpa: don't setup irq offloading when irq_num < 0 [ Upstream commit cce0ab2b2a39072d81f98017f7b076f3410ef740 ] When irq number is negative(e.g., -EINVAL), the virtqueue may be disabled or the virtqueues are sharing a device irq. In such case, we should not setup irq offloading for a virtqueue. Signed-off-by: Zhu Lingshan Link: https://lore.kernel.org/r/20220222115428.998334-3-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vdpa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index d62f05d056b7b..299a995326185 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -97,8 +97,11 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) return; irq = ops->get_vq_irq(vdpa, qid); + if (irq < 0) + return; + irq_bypass_unregister_producer(&vq->call_ctx.producer); - if (!vq->call_ctx.ctx || irq < 0) + if (!vq->call_ctx.ctx) return; vq->call_ctx.producer.token = vq->call_ctx.ctx; -- GitLab From b6f7efb9221ba5c6e66e3dba74b5c34c31ac81b6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 20 Mar 2022 07:02:14 -0400 Subject: [PATCH 0033/4335] tools/virtio: compile with -pthread [ Upstream commit f03560a57c1f60db6ac23ffd9714e1c69e2f95c7 ] When using pthreads, one has to compile and link with -lpthread, otherwise e.g. glibc is not guaranteed to be reentrant. This replaces -lpthread. Reported-by: Matthew Wilcox Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- tools/virtio/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index 0d7bbe49359d8..1b25cc7c64bbd 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -5,7 +5,8 @@ virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h -LDFLAGS += -lpthread +CFLAGS += -pthread +LDFLAGS += -pthread vpath %.c ../../drivers/virtio ../../drivers/vhost mod: ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} -- GitLab From d44ff3b100b94e9f23b1e8dbe688eee9bb867ac9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 21 Mar 2022 13:57:27 +0200 Subject: [PATCH 0034/4335] nvmet: use a private workqueue instead of the system workqueue [ Upstream commit 8832cf922151e9dfa2821736beb0ae2dd3968b6e ] Any attempt to flush kernel-global WQs has possibility of deadlock so we should simply stop using them, instead introduce nvmet_wq which is the generic nvmet workqueue for work elements that don't explicitly require a dedicated workqueue (by the mere fact that they are using the system_wq). Changes were done using the following replaces: - s/schedule_work(/queue_work(nvmet_wq, /g - s/schedule_delayed_work(/queue_delayed_work(nvmet_wq, /g - s/flush_scheduled_work()/flush_workqueue(nvmet_wq)/g Reported-by: Tetsuo Handa Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/target/admin-cmd.c | 2 +- drivers/nvme/target/configfs.c | 2 +- drivers/nvme/target/core.c | 24 ++++++++++++++++++------ drivers/nvme/target/fc.c | 8 ++++---- drivers/nvme/target/fcloop.c | 16 ++++++++-------- drivers/nvme/target/io-cmd-file.c | 6 +++--- drivers/nvme/target/loop.c | 4 ++-- drivers/nvme/target/nvmet.h | 1 + drivers/nvme/target/passthru.c | 2 +- drivers/nvme/target/rdma.c | 12 ++++++------ drivers/nvme/target/tcp.c | 10 +++++----- 11 files changed, 50 insertions(+), 37 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index aa6d84d8848e7..52bb262d267ac 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -978,7 +978,7 @@ void nvmet_execute_async_event(struct nvmet_req *req) ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req; mutex_unlock(&ctrl->lock); - schedule_work(&ctrl->async_event_work); + queue_work(nvmet_wq, &ctrl->async_event_work); } void nvmet_execute_keep_alive(struct nvmet_req *req) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 496d775c67707..cea30e4f50533 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1554,7 +1554,7 @@ static void nvmet_port_release(struct config_item *item) struct nvmet_port *port = to_nvmet_port(item); /* Let inflight controllers teardown complete */ - flush_scheduled_work(); + flush_workqueue(nvmet_wq); list_del(&port->global_entry); kfree(port->ana_state); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b8425fa34300f..a8dafe8670f20 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -20,6 +20,9 @@ struct workqueue_struct *zbd_wq; static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; static DEFINE_IDA(cntlid_ida); +struct workqueue_struct *nvmet_wq; +EXPORT_SYMBOL_GPL(nvmet_wq); + /* * This read/write semaphore is used to synchronize access to configuration * information on a target system that will result in discovery log page @@ -205,7 +208,7 @@ void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, list_add_tail(&aen->entry, &ctrl->async_events); mutex_unlock(&ctrl->lock); - schedule_work(&ctrl->async_event_work); + queue_work(nvmet_wq, &ctrl->async_event_work); } static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) @@ -385,7 +388,7 @@ static void nvmet_keep_alive_timer(struct work_struct *work) if (reset_tbkas) { pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", ctrl->cntlid); - schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); return; } @@ -403,7 +406,7 @@ void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) pr_debug("ctrl %d start keep-alive timer for %d secs\n", ctrl->cntlid, ctrl->kato); - schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); } void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) @@ -1477,7 +1480,7 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) mutex_lock(&ctrl->lock); if (!(ctrl->csts & NVME_CSTS_CFS)) { ctrl->csts |= NVME_CSTS_CFS; - schedule_work(&ctrl->fatal_err_work); + queue_work(nvmet_wq, &ctrl->fatal_err_work); } mutex_unlock(&ctrl->lock); } @@ -1617,9 +1620,15 @@ static int __init nvmet_init(void) goto out_free_zbd_work_queue; } + nvmet_wq = alloc_workqueue("nvmet-wq", WQ_MEM_RECLAIM, 0); + if (!nvmet_wq) { + error = -ENOMEM; + goto out_free_buffered_work_queue; + } + error = nvmet_init_discovery(); if (error) - goto out_free_work_queue; + goto out_free_nvmet_work_queue; error = nvmet_init_configfs(); if (error) @@ -1628,7 +1637,9 @@ static int __init nvmet_init(void) out_exit_discovery: nvmet_exit_discovery(); -out_free_work_queue: +out_free_nvmet_work_queue: + destroy_workqueue(nvmet_wq); +out_free_buffered_work_queue: destroy_workqueue(buffered_io_wq); out_free_zbd_work_queue: destroy_workqueue(zbd_wq); @@ -1640,6 +1651,7 @@ static void __exit nvmet_exit(void) nvmet_exit_configfs(); nvmet_exit_discovery(); ida_destroy(&cntlid_ida); + destroy_workqueue(nvmet_wq); destroy_workqueue(buffered_io_wq); destroy_workqueue(zbd_wq); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 22b5108168a6a..c43bc5e1c7a28 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1491,7 +1491,7 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!schedule_work(&assoc->del_work)) + if (!queue_work(nvmet_wq, &assoc->del_work)) /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); } @@ -1546,7 +1546,7 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - if (!schedule_work(&assoc->del_work)) + if (!queue_work(nvmet_wq, &assoc->del_work)) /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); } @@ -1592,7 +1592,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!schedule_work(&assoc->del_work)) + if (!queue_work(nvmet_wq, &assoc->del_work)) /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); return; @@ -2060,7 +2060,7 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, iod->rqstdatalen = lsreqbuf_len; iod->hosthandle = hosthandle; - schedule_work(&iod->work); + queue_work(nvmet_wq, &iod->work); return 0; } diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 54606f1872b4a..5c16372f3b533 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -360,7 +360,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, spin_lock(&rport->lock); list_add_tail(&rport->ls_list, &tls_req->ls_list); spin_unlock(&rport->lock); - schedule_work(&rport->ls_work); + queue_work(nvmet_wq, &rport->ls_work); return ret; } @@ -393,7 +393,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, spin_lock(&rport->lock); list_add_tail(&rport->ls_list, &tls_req->ls_list); spin_unlock(&rport->lock); - schedule_work(&rport->ls_work); + queue_work(nvmet_wq, &rport->ls_work); } return 0; @@ -448,7 +448,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, spin_lock(&tport->lock); list_add_tail(&tport->ls_list, &tls_req->ls_list); spin_unlock(&tport->lock); - schedule_work(&tport->ls_work); + queue_work(nvmet_wq, &tport->ls_work); return ret; } @@ -480,7 +480,7 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, spin_lock(&tport->lock); list_add_tail(&tport->ls_list, &tls_req->ls_list); spin_unlock(&tport->lock); - schedule_work(&tport->ls_work); + queue_work(nvmet_wq, &tport->ls_work); } return 0; @@ -520,7 +520,7 @@ fcloop_tgt_discovery_evt(struct nvmet_fc_target_port *tgtport) tgt_rscn->tport = tgtport->private; INIT_WORK(&tgt_rscn->work, fcloop_tgt_rscn_work); - schedule_work(&tgt_rscn->work); + queue_work(nvmet_wq, &tgt_rscn->work); } static void @@ -739,7 +739,7 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, INIT_WORK(&tfcp_req->tio_done_work, fcloop_tgt_fcprqst_done_work); kref_init(&tfcp_req->ref); - schedule_work(&tfcp_req->fcp_rcv_work); + queue_work(nvmet_wq, &tfcp_req->fcp_rcv_work); return 0; } @@ -921,7 +921,7 @@ fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, { struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); - schedule_work(&tfcp_req->tio_done_work); + queue_work(nvmet_wq, &tfcp_req->tio_done_work); } static void @@ -976,7 +976,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, if (abortio) /* leave the reference while the work item is scheduled */ - WARN_ON(!schedule_work(&tfcp_req->abort_rcv_work)); + WARN_ON(!queue_work(nvmet_wq, &tfcp_req->abort_rcv_work)); else { /* * as the io has already had the done callback made, diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index df7e033dd2732..228871d48106b 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -292,7 +292,7 @@ static void nvmet_file_execute_flush(struct nvmet_req *req) if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_flush_work); - schedule_work(&req->f.work); + queue_work(nvmet_wq, &req->f.work); } static void nvmet_file_execute_discard(struct nvmet_req *req) @@ -352,7 +352,7 @@ static void nvmet_file_execute_dsm(struct nvmet_req *req) if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) return; INIT_WORK(&req->f.work, nvmet_file_dsm_work); - schedule_work(&req->f.work); + queue_work(nvmet_wq, &req->f.work); } static void nvmet_file_write_zeroes_work(struct work_struct *w) @@ -382,7 +382,7 @@ static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work); - schedule_work(&req->f.work); + queue_work(nvmet_wq, &req->f.work); } u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 0285ccc7541f6..2553f487c9f24 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -166,7 +166,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, iod->req.transfer_len = blk_rq_payload_bytes(req); } - schedule_work(&iod->work); + queue_work(nvmet_wq, &iod->work); return BLK_STS_OK; } @@ -187,7 +187,7 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg) return; } - schedule_work(&iod->work); + queue_work(nvmet_wq, &iod->work); } static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 7143c7fa74641..dbeb0b8c11947 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -365,6 +365,7 @@ struct nvmet_req { extern struct workqueue_struct *buffered_io_wq; extern struct workqueue_struct *zbd_wq; +extern struct workqueue_struct *nvmet_wq; static inline void nvmet_set_result(struct nvmet_req *req, u32 result) { diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index f0efb35379898..6220e1dd961ad 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -281,7 +281,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) if (req->p.use_workqueue || effects) { INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work); req->p.rq = rq; - schedule_work(&req->p.work); + queue_work(nvmet_wq, &req->p.work); } else { rq->end_io_data = req; blk_execute_rq_nowait(ns ? ns->disk : NULL, rq, 0, diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index f1eedbf493d5b..18e082091c821 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1583,7 +1583,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ - flush_scheduled_work(); + flush_workqueue(nvmet_wq); } ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); @@ -1668,7 +1668,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) if (disconnect) { rdma_disconnect(queue->cm_id); - schedule_work(&queue->release_work); + queue_work(nvmet_wq, &queue->release_work); } } @@ -1698,7 +1698,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, mutex_unlock(&nvmet_rdma_queue_mutex); pr_err("failed to connect queue %d\n", queue->idx); - schedule_work(&queue->release_work); + queue_work(nvmet_wq, &queue->release_work); } /** @@ -1772,7 +1772,7 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, if (!queue) { struct nvmet_rdma_port *port = cm_id->context; - schedule_delayed_work(&port->repair_work, 0); + queue_delayed_work(nvmet_wq, &port->repair_work, 0); break; } fallthrough; @@ -1902,7 +1902,7 @@ static void nvmet_rdma_repair_port_work(struct work_struct *w) nvmet_rdma_disable_port(port); ret = nvmet_rdma_enable_port(port); if (ret) - schedule_delayed_work(&port->repair_work, 5 * HZ); + queue_delayed_work(nvmet_wq, &port->repair_work, 5 * HZ); } static int nvmet_rdma_add_port(struct nvmet_port *nport) @@ -2046,7 +2046,7 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data } mutex_unlock(&nvmet_rdma_queue_mutex); - flush_scheduled_work(); + flush_workqueue(nvmet_wq); } static struct ib_client nvmet_rdma_ib_client = { diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 2b8bab28417b8..f592e5f7f5f3d 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1251,7 +1251,7 @@ static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue) spin_lock(&queue->state_lock); if (queue->state != NVMET_TCP_Q_DISCONNECTING) { queue->state = NVMET_TCP_Q_DISCONNECTING; - schedule_work(&queue->release_work); + queue_work(nvmet_wq, &queue->release_work); } spin_unlock(&queue->state_lock); } @@ -1662,7 +1662,7 @@ static void nvmet_tcp_listen_data_ready(struct sock *sk) goto out; if (sk->sk_state == TCP_LISTEN) - schedule_work(&port->accept_work); + queue_work(nvmet_wq, &port->accept_work); out: read_unlock_bh(&sk->sk_callback_lock); } @@ -1793,7 +1793,7 @@ static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq) if (sq->qid == 0) { /* Let inflight controller teardown complete */ - flush_scheduled_work(); + flush_workqueue(nvmet_wq); } queue->nr_cmds = sq->size * 2; @@ -1854,12 +1854,12 @@ static void __exit nvmet_tcp_exit(void) nvmet_unregister_transport(&nvmet_tcp_ops); - flush_scheduled_work(); + flush_workqueue(nvmet_wq); mutex_lock(&nvmet_tcp_queue_mutex); list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list) kernel_sock_shutdown(queue->sock, SHUT_RDWR); mutex_unlock(&nvmet_tcp_queue_mutex); - flush_scheduled_work(); + flush_workqueue(nvmet_wq); destroy_workqueue(nvmet_tcp_wq); } -- GitLab From 9690e989d2637a35bbec5fd9e70ee15b65833d70 Mon Sep 17 00:00:00 2001 From: Anton Eidelman Date: Thu, 24 Mar 2022 13:05:11 -0600 Subject: [PATCH 0035/4335] nvme-multipath: fix hang when disk goes live over reconnect [ Upstream commit a4a6f3c8f61c3cfbda4998ad94596059ad7e4332 ] nvme_mpath_init_identify() invoked from nvme_init_identify() fetches a fresh ANA log from the ctrl. This is essential to have an up to date path states for both existing namespaces and for those scan_work may discover once the ctrl is up. This happens in the following cases: 1) A new ctrl is being connected. 2) An existing ctrl is successfully reconnected. 3) An existing ctrl is being reset. While in (1) ctrl->namespaces is empty, (2 & 3) may have namespaces, and nvme_read_ana_log() may call nvme_update_ns_ana_state(). This result in a hang when the ANA state of an existing namespace changes and makes the disk live: nvme_mpath_set_live() issues IO to the namespace through the ctrl, which does NOT have IO queues yet. See sample hang below. Solution: - nvme_update_ns_ana_state() to call set_live only if ctrl is live - nvme_read_ana_log() call from nvme_mpath_init_identify() therefore only fetches and parses the ANA log; any erros in this process will fail the ctrl setup as appropriate; - a separate function nvme_mpath_update() is called in nvme_start_ctrl(); this parses the ANA log without fetching it. At this point the ctrl is live, therefore, disks can be set live normally. Sample failure: nvme nvme0: starting error recovery nvme nvme0: Reconnecting in 10 seconds... block nvme0n6: no usable path - requeuing I/O INFO: task kworker/u8:3:312 blocked for more than 122 seconds. Tainted: G E 5.14.5-1.el7.elrepo.x86_64 #1 Workqueue: nvme-wq nvme_tcp_reconnect_ctrl_work [nvme_tcp] Call Trace: __schedule+0x2a2/0x7e0 schedule+0x4e/0xb0 io_schedule+0x16/0x40 wait_on_page_bit_common+0x15c/0x3e0 do_read_cache_page+0x1e0/0x410 read_cache_page+0x12/0x20 read_part_sector+0x46/0x100 read_lba+0x121/0x240 efi_partition+0x1d2/0x6a0 bdev_disk_changed.part.0+0x1df/0x430 bdev_disk_changed+0x18/0x20 blkdev_get_whole+0x77/0xe0 blkdev_get_by_dev+0xd2/0x3a0 __device_add_disk+0x1ed/0x310 device_add_disk+0x13/0x20 nvme_mpath_set_live+0x138/0x1b0 [nvme_core] nvme_update_ns_ana_state+0x2b/0x30 [nvme_core] nvme_update_ana_state+0xca/0xe0 [nvme_core] nvme_parse_ana_log+0xac/0x170 [nvme_core] nvme_read_ana_log+0x7d/0xe0 [nvme_core] nvme_mpath_init_identify+0x105/0x150 [nvme_core] nvme_init_identify+0x2df/0x4d0 [nvme_core] nvme_init_ctrl_finish+0x8d/0x3b0 [nvme_core] nvme_tcp_setup_ctrl+0x337/0x390 [nvme_tcp] nvme_tcp_reconnect_ctrl_work+0x24/0x40 [nvme_tcp] process_one_work+0x1bd/0x360 worker_thread+0x50/0x3d0 Signed-off-by: Anton Eidelman Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/multipath.c | 25 +++++++++++++++++++++++-- drivers/nvme/host/nvme.h | 4 ++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f2bb576157625..87877397d1add 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4358,6 +4358,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); nvme_start_queues(ctrl); + nvme_mpath_update(ctrl); } } EXPORT_SYMBOL_GPL(nvme_start_ctrl); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index e9301b51db763..064acad505d38 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -574,8 +574,17 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, ns->ana_grpid = le32_to_cpu(desc->grpid); ns->ana_state = desc->state; clear_bit(NVME_NS_ANA_PENDING, &ns->flags); - - if (nvme_state_is_live(ns->ana_state)) + /* + * nvme_mpath_set_live() will trigger I/O to the multipath path device + * and in turn to this path device. However we cannot accept this I/O + * if the controller is not live. This may deadlock if called from + * nvme_mpath_init_identify() and the ctrl will never complete + * initialization, preventing I/O from completing. For this case we + * will reprocess the ANA log page in nvme_mpath_update() once the + * controller is ready. + */ + if (nvme_state_is_live(ns->ana_state) && + ns->ctrl->state == NVME_CTRL_LIVE) nvme_mpath_set_live(ns); } @@ -662,6 +671,18 @@ static void nvme_ana_work(struct work_struct *work) nvme_read_ana_log(ctrl); } +void nvme_mpath_update(struct nvme_ctrl *ctrl) +{ + u32 nr_change_groups = 0; + + if (!ctrl->ana_log_buf) + return; + + mutex_lock(&ctrl->ana_lock); + nvme_parse_ana_log(ctrl, &nr_change_groups, nvme_update_ana_state); + mutex_unlock(&ctrl->ana_lock); +} + static void nvme_anatt_timeout(struct timer_list *t) { struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f1e5c7564cae6..72bcd7e5716e5 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -776,6 +776,7 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl); +void nvme_mpath_update(struct nvme_ctrl *ctrl); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); @@ -850,6 +851,9 @@ static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, "Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n"); return 0; } +static inline void nvme_mpath_update(struct nvme_ctrl *ctrl) +{ +} static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl) { } -- GitLab From 258a7a7fb568c4ae5c59a445f5ea0b7da35bb552 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 11 Jan 2022 16:57:50 -0600 Subject: [PATCH 0036/4335] rtc: mc146818-lib: Fix the AltCentury for AMD platforms [ Upstream commit 3ae8fd41573af4fb3a490c9ed947fc936ba87190 ] Setting the century forward has been failing on AMD platforms. There was a previous attempt at fixing this for family 0x17 as part of commit 7ad295d5196a ("rtc: Fix the AltCentury value on AMD/Hygon platform") but this was later reverted due to some problems reported that appeared to stem from an FW bug on a family 0x17 desktop system. The same comments mentioned in the previous commit continue to apply to the newer platforms as well. ``` MC146818 driver use function mc146818_set_time() to set register RTC_FREQ_SELECT(RTC_REG_A)'s bit4-bit6 field which means divider stage reset value on Intel platform to 0x7. While AMD/Hygon RTC_REG_A(0Ah)'s bit4 is defined as DV0 [Reference]: DV0 = 0 selects Bank 0, DV0 = 1 selects Bank 1. Bit5-bit6 is defined as reserved. DV0 is set to 1, it will select Bank 1, which will disable AltCentury register(0x32) access. As UEFI pass acpi_gbl_FADT.century 0x32 (AltCentury), the CMOS write will be failed on code: CMOS_WRITE(century, acpi_gbl_FADT.century). Correct RTC_REG_A bank select bit(DV0) to 0 on AMD/Hygon CPUs, it will enable AltCentury(0x32) register writing and finally setup century as expected. ``` However in closer examination the change previously submitted was also modifying bits 5 & 6 which are declared reserved in the AMD documentation. So instead modify just the DV0 bank selection bit. Being cognizant that there was a failure reported before, split the code change out to a static function that can also be used for exclusions if any regressions such as Mikhail's pop up again. Cc: Jinke Fan Cc: Mikhail Gavrilov Link: https://lore.kernel.org/all/CABXGCsMLob0DC25JS8wwAYydnDoHBSoMh2_YLPfqm3TTvDE-Zw@mail.gmail.com/ Link: https://www.amd.com/system/files/TechDocs/51192_Bolton_FCH_RRG.pdf Signed-off-by: Raul E Rangel Signed-off-by: Mario Limonciello Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220111225750.1699-1-mario.limonciello@amd.com Signed-off-by: Sasha Levin --- drivers/rtc/rtc-mc146818-lib.c | 16 +++++++++++++++- include/linux/mc146818rtc.h | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 8abac672f3cbd..f3f5a87fe376e 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -146,6 +146,17 @@ again: } EXPORT_SYMBOL_GPL(mc146818_get_time); +/* AMD systems don't allow access to AltCentury with DV1 */ +static bool apply_amd_register_a_behavior(void) +{ +#ifdef CONFIG_X86 + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + return true; +#endif + return false; +} + /* Set the current date and time in the real time clock. */ int mc146818_set_time(struct rtc_time *time) { @@ -219,7 +230,10 @@ int mc146818_set_time(struct rtc_time *time) save_control = CMOS_READ(RTC_CONTROL); CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + if (apply_amd_register_a_behavior()) + CMOS_WRITE((save_freq_select & ~RTC_AMD_BANK_SELECT), RTC_FREQ_SELECT); + else + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); #ifdef CONFIG_MACH_DECSTATION CMOS_WRITE(real_yrs, RTC_DEC_YEAR); diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index 37c74e25f53dd..3038124c61154 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -86,6 +86,8 @@ struct cmos_rtc_board_info { /* 2 values for divider stage reset, others for "testing purposes only" */ # define RTC_DIV_RESET1 0x60 # define RTC_DIV_RESET2 0x70 + /* In AMD BKDG bit 5 and 6 are reserved, bit 4 is for select dv0 bank */ +# define RTC_AMD_BANK_SELECT 0x10 /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */ # define RTC_RATE_SELECT 0x0F -- GitLab From 9a865748658bd38f68df086214d124b170caec3b Mon Sep 17 00:00:00 2001 From: Guo Xuenan Date: Wed, 30 Mar 2022 09:49:28 -0700 Subject: [PATCH 0037/4335] fs: fix an infinite loop in iomap_fiemap [ Upstream commit 49df34221804cfd6384135b28b03c9461a31d024 ] when get fiemap starting from MAX_LFS_FILESIZE, (maxbytes - *len) < start will always true , then *len set zero. because of start offset is beyond file size, for erofs filesystem it will always return iomap.length with zero,iomap iterate will enter infinite loop. it is necessary cover this corner case to avoid this situation. ------------[ cut here ]------------ WARNING: CPU: 7 PID: 905 at fs/iomap/iter.c:35 iomap_iter+0x97f/0xc70 Modules linked in: xfs erofs CPU: 7 PID: 905 Comm: iomap Tainted: G W 5.17.0-rc8 #27 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:iomap_iter+0x97f/0xc70 Code: 85 a1 fc ff ff e8 71 be 9c ff 0f 1f 44 00 00 e9 92 fc ff ff e8 62 be 9c ff 0f 0b b8 fb ff ff ff e9 fc f8 ff ff e8 51 be 9c ff <0f> 0b e9 2b fc ff ff e8 45 be 9c ff 0f 0b e9 e1 fb ff ff e8 39 be RSP: 0018:ffff888060a37ab0 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff888060a37bb0 RCX: 0000000000000000 RDX: ffff88807e19a900 RSI: ffffffff81a7da7f RDI: ffff888060a37be0 RBP: 7fffffffffffffff R08: 0000000000000000 R09: ffff888060a37c20 R10: ffff888060a37c67 R11: ffffed100c146f8c R12: 7fffffffffffffff R13: 0000000000000000 R14: ffff888060a37bd8 R15: ffff888060a37c20 FS: 00007fd3cca01540(0000) GS:ffff888108780000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020010820 CR3: 0000000054b92000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: iomap_fiemap+0x1c9/0x2f0 erofs_fiemap+0x64/0x90 [erofs] do_vfs_ioctl+0x40d/0x12e0 __x64_sys_ioctl+0xaa/0x1c0 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae ---[ end trace 0000000000000000 ]--- watchdog: BUG: soft lockup - CPU#7 stuck for 26s! [iomap:905] Reported-by: Hulk Robot Signed-off-by: Guo Xuenan Reviewed-by: Christoph Hellwig [djwong: fix some typos] Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin --- fs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ioctl.c b/fs/ioctl.c index 504e695781124..e0a3455f9a0f6 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -173,7 +173,7 @@ int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, if (*len == 0) return -EINVAL; - if (start > maxbytes) + if (start >= maxbytes) return -EFBIG; /* -- GitLab From 98e0a2e96a788ad3db435bf86a7413b7388b50b8 Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Fri, 25 Mar 2022 19:49:41 +0800 Subject: [PATCH 0038/4335] MIPS: lantiq: check the return value of kzalloc() [ Upstream commit 34123208bbcc8c884a0489f543a23fe9eebb5514 ] kzalloc() is a memory allocation function which can return NULL when some internal memory errors happen. So it is better to check the return value of it to prevent potential wrong memory access or memory leak. Signed-off-by: Xiaoke Wang Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/lantiq/falcon/sysctrl.c | 2 ++ arch/mips/lantiq/xway/gptu.c | 2 ++ arch/mips/lantiq/xway/sysctrl.c | 46 ++++++++++++++++++++----------- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c index 42222f849bd25..446a2536999bf 100644 --- a/arch/mips/lantiq/falcon/sysctrl.c +++ b/arch/mips/lantiq/falcon/sysctrl.c @@ -167,6 +167,8 @@ static inline void clkdev_add_sys(const char *dev, unsigned int module, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = NULL; clk->cl.clk = clk; diff --git a/arch/mips/lantiq/xway/gptu.c b/arch/mips/lantiq/xway/gptu.c index 3d5683e75cf1e..200fe9ff641d6 100644 --- a/arch/mips/lantiq/xway/gptu.c +++ b/arch/mips/lantiq/xway/gptu.c @@ -122,6 +122,8 @@ static inline void clkdev_add_gptu(struct device *dev, const char *con, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev_name(dev); clk->cl.con_id = con; clk->cl.clk = clk; diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 917fac1636b71..084f6caba5f23 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -315,6 +315,8 @@ static void clkdev_add_pmu(const char *dev, const char *con, bool deactivate, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = con; clk->cl.clk = clk; @@ -338,6 +340,8 @@ static void clkdev_add_cgu(const char *dev, const char *con, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = con; clk->cl.clk = clk; @@ -356,24 +360,28 @@ static void clkdev_add_pci(void) struct clk *clk_ext = kzalloc(sizeof(struct clk), GFP_KERNEL); /* main pci clock */ - clk->cl.dev_id = "17000000.pci"; - clk->cl.con_id = NULL; - clk->cl.clk = clk; - clk->rate = CLOCK_33M; - clk->rates = valid_pci_rates; - clk->enable = pci_enable; - clk->disable = pmu_disable; - clk->module = 0; - clk->bits = PMU_PCI; - clkdev_add(&clk->cl); + if (clk) { + clk->cl.dev_id = "17000000.pci"; + clk->cl.con_id = NULL; + clk->cl.clk = clk; + clk->rate = CLOCK_33M; + clk->rates = valid_pci_rates; + clk->enable = pci_enable; + clk->disable = pmu_disable; + clk->module = 0; + clk->bits = PMU_PCI; + clkdev_add(&clk->cl); + } /* use internal/external bus clock */ - clk_ext->cl.dev_id = "17000000.pci"; - clk_ext->cl.con_id = "external"; - clk_ext->cl.clk = clk_ext; - clk_ext->enable = pci_ext_enable; - clk_ext->disable = pci_ext_disable; - clkdev_add(&clk_ext->cl); + if (clk_ext) { + clk_ext->cl.dev_id = "17000000.pci"; + clk_ext->cl.con_id = "external"; + clk_ext->cl.clk = clk_ext; + clk_ext->enable = pci_ext_enable; + clk_ext->disable = pci_ext_disable; + clkdev_add(&clk_ext->cl); + } } /* xway socs can generate clocks on gpio pins */ @@ -393,9 +401,15 @@ static void clkdev_add_clkout(void) char *name; name = kzalloc(sizeof("clkout0"), GFP_KERNEL); + if (!name) + continue; sprintf(name, "clkout%d", i); clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) { + kfree(name); + continue; + } clk->cl.dev_id = "1f103000.cgu"; clk->cl.con_id = name; clk->cl.clk = clk; -- GitLab From 1848108927ce2bbb6cf12823991ca22298841559 Mon Sep 17 00:00:00 2001 From: Jakob Koschel Date: Fri, 1 Apr 2022 00:03:48 +0200 Subject: [PATCH 0039/4335] drbd: remove usage of list iterator variable after loop [ Upstream commit 901aeda62efa21f2eae937bccb71b49ae531be06 ] In preparation to limit the scope of a list iterator to the list traversal loop, use a dedicated pointer to iterate through the list [1]. Since that variable should not be used past the loop iteration, a separate variable is used to 'remember the current location within the loop'. To either continue iterating from that position or skip the iteration (if the previous iteration was complete) list_prepare_entry() is used. Link: https://lore.kernel.org/all/CAHk-=wgRr_D8CB-D9Kg-c=EHreAsk5SqXPwr9Y7k9sA6cWXJ6w@mail.gmail.com/ [1] Signed-off-by: Jakob Koschel Link: https://lore.kernel.org/r/20220331220349.885126-1-jakobkoschel@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 548e0dd535284..6db0333b5b7a1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -171,7 +171,7 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, unsigned int set_size) { struct drbd_request *r; - struct drbd_request *req = NULL; + struct drbd_request *req = NULL, *tmp = NULL; int expect_epoch = 0; int expect_size = 0; @@ -225,8 +225,11 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, * to catch requests being barrier-acked "unexpectedly". * It usually should find the same req again, or some READ preceding it. */ list_for_each_entry(req, &connection->transfer_log, tl_requests) - if (req->epoch == expect_epoch) + if (req->epoch == expect_epoch) { + tmp = req; break; + } + req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests); list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) { if (req->epoch != expect_epoch) break; -- GitLab From 6d32c58b267a7d628c696e9d76c605b84af77f21 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Wed, 9 Feb 2022 13:11:30 +0800 Subject: [PATCH 0040/4335] platform/chrome: cros_ec_debugfs: detach log reader wq from devm [ Upstream commit 0e8eb5e8acbad19ac2e1856b2fb2320184299b33 ] Debugfs console_log uses devm memory (e.g. debug_info in cros_ec_console_log_poll()). However, lifecycles of device and debugfs are independent. An use-after-free issue is observed if userland program operates the debugfs after the memory has been freed. The call trace: do_raw_spin_lock _raw_spin_lock_irqsave remove_wait_queue ep_unregister_pollwait ep_remove do_epoll_ctl A Python example to reproduce the issue: ... import select ... p = select.epoll() ... f = open('/sys/kernel/debug/cros_scp/console_log') ... p.register(f, select.POLLIN) ... p.poll(1) [(4, 1)] # 4=fd, 1=select.POLLIN [ shutdown cros_scp at the point ] ... p.poll(1) [(4, 16)] # 4=fd, 16=select.POLLHUP ... p.unregister(f) An use-after-free issue raises here. It called epoll_ctl with EPOLL_CTL_DEL which in turn to use the workqueue in the devm (i.e. log_wq). Detaches log reader's workqueue from devm to make sure it is persistent even if the device has been removed. Signed-off-by: Tzung-Bi Shih Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20220209051130.386175-1-tzungbi@google.com Signed-off-by: Benson Leung Signed-off-by: Sasha Levin --- drivers/platform/chrome/cros_ec_debugfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c index 272c89837d745..0dbceee87a4b1 100644 --- a/drivers/platform/chrome/cros_ec_debugfs.c +++ b/drivers/platform/chrome/cros_ec_debugfs.c @@ -25,6 +25,9 @@ #define CIRC_ADD(idx, size, value) (((idx) + (value)) & ((size) - 1)) +/* waitqueue for log readers */ +static DECLARE_WAIT_QUEUE_HEAD(cros_ec_debugfs_log_wq); + /** * struct cros_ec_debugfs - EC debugging information. * @@ -33,7 +36,6 @@ * @log_buffer: circular buffer for console log information * @read_msg: preallocated EC command and buffer to read console log * @log_mutex: mutex to protect circular buffer - * @log_wq: waitqueue for log readers * @log_poll_work: recurring task to poll EC for new console log data * @panicinfo_blob: panicinfo debugfs blob */ @@ -44,7 +46,6 @@ struct cros_ec_debugfs { struct circ_buf log_buffer; struct cros_ec_command *read_msg; struct mutex log_mutex; - wait_queue_head_t log_wq; struct delayed_work log_poll_work; /* EC panicinfo */ struct debugfs_blob_wrapper panicinfo_blob; @@ -107,7 +108,7 @@ static void cros_ec_console_log_work(struct work_struct *__work) buf_space--; } - wake_up(&debug_info->log_wq); + wake_up(&cros_ec_debugfs_log_wq); } mutex_unlock(&debug_info->log_mutex); @@ -141,7 +142,7 @@ static ssize_t cros_ec_console_log_read(struct file *file, char __user *buf, mutex_unlock(&debug_info->log_mutex); - ret = wait_event_interruptible(debug_info->log_wq, + ret = wait_event_interruptible(cros_ec_debugfs_log_wq, CIRC_CNT(cb->head, cb->tail, LOG_SIZE)); if (ret < 0) return ret; @@ -173,7 +174,7 @@ static __poll_t cros_ec_console_log_poll(struct file *file, struct cros_ec_debugfs *debug_info = file->private_data; __poll_t mask = 0; - poll_wait(file, &debug_info->log_wq, wait); + poll_wait(file, &cros_ec_debugfs_log_wq, wait); mutex_lock(&debug_info->log_mutex); if (CIRC_CNT(debug_info->log_buffer.head, @@ -377,7 +378,6 @@ static int cros_ec_create_console_log(struct cros_ec_debugfs *debug_info) debug_info->log_buffer.tail = 0; mutex_init(&debug_info->log_mutex); - init_waitqueue_head(&debug_info->log_wq); debugfs_create_file("console_log", S_IFREG | 0444, debug_info->dir, debug_info, &cros_ec_console_log_fops); -- GitLab From 2a8b2f6a91d1fe0d343b1d0b5fb9df62fc9e2552 Mon Sep 17 00:00:00 2001 From: linyujun Date: Fri, 1 Apr 2022 10:52:47 +0100 Subject: [PATCH 0041/4335] ARM: 9191/1: arm/stacktrace, kasan: Silence KASAN warnings in unwind_frame() [ Upstream commit 9be4c88bb7924f68f88cfd47d925c2d046f51a73 ] The following KASAN warning is detected by QEMU. ================================================================== BUG: KASAN: stack-out-of-bounds in unwind_frame+0x508/0x870 Read of size 4 at addr c36bba90 by task cat/163 CPU: 1 PID: 163 Comm: cat Not tainted 5.10.0-rc1 #40 Hardware name: ARM-Versatile Express [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x98/0xb0) [] (dump_stack) from [] (print_address_description.constprop.0+0x58/0x4bc) [] (print_address_description.constprop.0) from [] (kasan_report+0x154/0x170) [] (kasan_report) from [] (unwind_frame+0x508/0x870) [] (unwind_frame) from [] (__save_stack_trace+0x110/0x134) [] (__save_stack_trace) from [] (stack_trace_save+0x8c/0xb4) [] (stack_trace_save) from [] (kasan_set_track+0x38/0x60) [] (kasan_set_track) from [] (kasan_set_free_info+0x20/0x2c) [] (kasan_set_free_info) from [] (__kasan_slab_free+0xec/0x120) [] (__kasan_slab_free) from [] (kmem_cache_free+0x7c/0x334) [] (kmem_cache_free) from [] (rcu_core+0x390/0xccc) [] (rcu_core) from [] (__do_softirq+0x180/0x518) [] (__do_softirq) from [] (irq_exit+0x9c/0xe0) [] (irq_exit) from [] (__handle_domain_irq+0xb0/0x110) [] (__handle_domain_irq) from [] (gic_handle_irq+0xa0/0xb8) [] (gic_handle_irq) from [] (__irq_svc+0x6c/0x94) Exception stack(0xc36bb928 to 0xc36bb970) b920: c36bb9c0 00000000 c0126919 c0101228 c36bb9c0 b76d7730 b940: c36b8000 c36bb9a0 c3335b00 c01ce0d8 00000003 c36bba3c c36bb940 c36bb978 b960: c010e298 c011373c 60000013 ffffffff [] (__irq_svc) from [] (unwind_frame+0x0/0x870) [] (unwind_frame) from [<00000000>] (0x0) The buggy address belongs to the page: page:(ptrval) refcount:0 mapcount:0 mapping:00000000 index:0x0 pfn:0x636bb flags: 0x0() raw: 00000000 00000000 ef867764 00000000 00000000 00000000 ffffffff 00000000 page dumped because: kasan: bad access detected addr c36bba90 is located in stack of task cat/163 at offset 48 in frame: stack_trace_save+0x0/0xb4 this frame has 1 object: [32, 48) 'trace' Memory state around the buggy address: c36bb980: f1 f1 f1 f1 00 04 f2 f2 00 00 f3 f3 00 00 00 00 c36bba00: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 >c36bba80: 00 00 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 ^ c36bbb00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c36bbb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== There is a same issue on x86 and has been resolved by the commit f7d27c35ddff ("x86/mm, kasan: Silence KASAN warnings in get_wchan()"). The solution could be applied to arm architecture too. Signed-off-by: Lin Yujun Reported-by: He Ying Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/kernel/stacktrace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index db798eac74315..8247749998259 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -53,17 +53,17 @@ int notrace unwind_frame(struct stackframe *frame) return -EINVAL; frame->sp = frame->fp; - frame->fp = *(unsigned long *)(fp); - frame->pc = *(unsigned long *)(fp + 4); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 4)); #else /* check current frame pointer is within bounds */ if (fp < low + 12 || fp > high - 4) return -EINVAL; /* restore the registers from the stack frame */ - frame->fp = *(unsigned long *)(fp - 12); - frame->sp = *(unsigned long *)(fp - 8); - frame->pc = *(unsigned long *)(fp - 4); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 12)); + frame->sp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 8)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 4)); #endif return 0; -- GitLab From d05cc5395e36711edad8bdef6945f138d8a7097b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 1 Apr 2022 11:28:18 -0700 Subject: [PATCH 0042/4335] nilfs2: fix lockdep warnings in page operations for btree nodes [ Upstream commit e897be17a441fa637cd166fc3de1445131e57692 ] Patch series "nilfs2 lockdep warning fixes". The first two are to resolve the lockdep warning issue, and the last one is the accompanying cleanup and low priority. Based on your comment, this series solves the issue by separating inode object as needed. Since I was worried about the impact of the object composition changes, I tested the series carefully not to cause regressions especially for delicate functions such like disk space reclamation and snapshots. This patch (of 3): If CONFIG_LOCKDEP is enabled, nilfs2 hits lockdep warnings at inode_to_wb() during page/folio operations for btree nodes: WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 inode_to_wb include/linux/backing-dev.h:269 [inline] WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 folio_account_dirtied mm/page-writeback.c:2460 [inline] WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 __folio_mark_dirty+0xa7c/0xe30 mm/page-writeback.c:2509 Modules linked in: ... RIP: 0010:inode_to_wb include/linux/backing-dev.h:269 [inline] RIP: 0010:folio_account_dirtied mm/page-writeback.c:2460 [inline] RIP: 0010:__folio_mark_dirty+0xa7c/0xe30 mm/page-writeback.c:2509 ... Call Trace: __set_page_dirty include/linux/pagemap.h:834 [inline] mark_buffer_dirty+0x4e6/0x650 fs/buffer.c:1145 nilfs_btree_propagate_p fs/nilfs2/btree.c:1889 [inline] nilfs_btree_propagate+0x4ae/0xea0 fs/nilfs2/btree.c:2085 nilfs_bmap_propagate+0x73/0x170 fs/nilfs2/bmap.c:337 nilfs_collect_dat_data+0x45/0xd0 fs/nilfs2/segment.c:625 nilfs_segctor_apply_buffers+0x14a/0x470 fs/nilfs2/segment.c:1009 nilfs_segctor_scan_file+0x47a/0x700 fs/nilfs2/segment.c:1048 nilfs_segctor_collect_blocks fs/nilfs2/segment.c:1224 [inline] nilfs_segctor_collect fs/nilfs2/segment.c:1494 [inline] nilfs_segctor_do_construct+0x14f3/0x6c60 fs/nilfs2/segment.c:2036 nilfs_segctor_construct+0x7a7/0xb30 fs/nilfs2/segment.c:2372 nilfs_segctor_thread_construct fs/nilfs2/segment.c:2480 [inline] nilfs_segctor_thread+0x3c3/0xf90 fs/nilfs2/segment.c:2563 kthread+0x405/0x4f0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 This is because nilfs2 uses two page caches for each inode and inode->i_mapping never points to one of them, the btree node cache. This causes inode_to_wb(inode) to refer to a different page cache than the caller page/folio operations such like __folio_start_writeback(), __folio_end_writeback(), or __folio_mark_dirty() acquired the lock. This patch resolves the issue by allocating and using an additional inode to hold the page cache of btree nodes. The inode is attached one-to-one to the traditional nilfs2 inode if it requires a block mapping with b-tree. This setup change is in memory only and does not affect the disk format. Link: https://lkml.kernel.org/r/1647867427-30498-1-git-send-email-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/1647867427-30498-2-git-send-email-konishi.ryusuke@gmail.com Link: https://lore.kernel.org/r/YXrYvIo8YRnAOJCj@casper.infradead.org Link: https://lore.kernel.org/r/9a20b33d-b38f-b4a2-4742-c1eb5b8e4d6c@redhat.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+0d5b462a6f07447991b3@syzkaller.appspotmail.com Reported-by: syzbot+34ef28bb2aeb28724aa0@syzkaller.appspotmail.com Reported-by: Hao Sun Reported-by: David Hildenbrand Tested-by: Ryusuke Konishi Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/nilfs2/btnode.c | 23 ++++++++-- fs/nilfs2/btnode.h | 1 + fs/nilfs2/btree.c | 27 ++++++++---- fs/nilfs2/gcinode.c | 7 +-- fs/nilfs2/inode.c | 104 ++++++++++++++++++++++++++++++++++++++------ fs/nilfs2/mdt.c | 7 +-- fs/nilfs2/nilfs.h | 14 +++--- fs/nilfs2/page.c | 7 ++- fs/nilfs2/segment.c | 9 ++-- fs/nilfs2/super.c | 5 +-- 10 files changed, 154 insertions(+), 50 deletions(-) diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 4391fd3abd8f8..e00e184b12615 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -20,6 +20,23 @@ #include "page.h" #include "btnode.h" + +/** + * nilfs_init_btnc_inode - initialize B-tree node cache inode + * @btnc_inode: inode to be initialized + * + * nilfs_init_btnc_inode() sets up an inode for B-tree node cache. + */ +void nilfs_init_btnc_inode(struct inode *btnc_inode) +{ + struct nilfs_inode_info *ii = NILFS_I(btnc_inode); + + btnc_inode->i_mode = S_IFREG; + ii->i_flags = 0; + memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap)); + mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS); +} + void nilfs_btnode_cache_clear(struct address_space *btnc) { invalidate_mapping_pages(btnc, 0, -1); @@ -29,7 +46,7 @@ void nilfs_btnode_cache_clear(struct address_space *btnc) struct buffer_head * nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) { - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; struct buffer_head *bh; bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); @@ -57,7 +74,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, struct buffer_head **pbh, sector_t *submit_ptr) { struct buffer_head *bh; - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; struct page *page; int err; @@ -157,7 +174,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) { struct buffer_head *obh, *nbh; - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; int err; diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 0f88dbc9bcb3e..05ab64d354dc9 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -30,6 +30,7 @@ struct nilfs_btnode_chkey_ctxt { struct buffer_head *newbh; }; +void nilfs_init_btnc_inode(struct inode *btnc_inode); void nilfs_btnode_cache_clear(struct address_space *); struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index ab9ec073330f1..2301b57ca17ff 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -58,7 +58,8 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path) static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp) { - struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh; bh = nilfs_btnode_create_block(btnc, ptr); @@ -470,7 +471,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp, const struct nilfs_btree_readahead_info *ra) { - struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh, *ra_bh; sector_t submit_ptr = 0; int ret; @@ -1741,6 +1743,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, dat = nilfs_bmap_get_dat(btree); } + ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); + if (ret < 0) + return ret; + ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); if (ret < 0) return ret; @@ -1913,7 +1919,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; path[level].bp_ctxt.bh = path[level].bp_bh; ret = nilfs_btnode_prepare_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) { nilfs_dat_abort_update(dat, @@ -1939,7 +1945,7 @@ static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, if (buffer_nilfs_node(path[level].bp_bh)) { nilfs_btnode_commit_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); path[level].bp_bh = path[level].bp_ctxt.bh; } @@ -1958,7 +1964,7 @@ static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, &path[level].bp_newreq.bpr_req); if (buffer_nilfs_node(path[level].bp_bh)) nilfs_btnode_abort_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); } @@ -2134,7 +2140,8 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, struct list_head *listp) { - struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btcache = btnc_inode->i_mapping; struct list_head lists[NILFS_BTREE_LEVEL_MAX]; struct pagevec pvec; struct buffer_head *bh, *head; @@ -2188,12 +2195,12 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree, path[level].bp_ctxt.newkey = blocknr; path[level].bp_ctxt.bh = *bh; ret = nilfs_btnode_prepare_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) return ret; nilfs_btnode_commit_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); *bh = path[level].bp_ctxt.bh; } @@ -2398,6 +2405,10 @@ int nilfs_btree_init(struct nilfs_bmap *bmap) if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) ret = -EIO; + else + ret = nilfs_attach_btree_node_cache( + &NILFS_BMAP_I(bmap)->vfs_inode); + return ret; } diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 4483204968568..aadea660c66c9 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -126,9 +126,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, __u64 vbn, struct buffer_head **out_bh) { + struct inode *btnc_inode = NILFS_I(inode)->i_assoc_inode; int ret; - ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, + ret = nilfs_btnode_submit_block(btnc_inode->i_mapping, vbn ? : pbn, pbn, REQ_OP_READ, 0, out_bh, &pbn); if (ret == -EEXIST) /* internal code (cache hit) */ @@ -170,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode) ii->i_flags = 0; nilfs_bmap_init_gc(ii->i_bmap); - return 0; + return nilfs_attach_btree_node_cache(inode); } /** @@ -185,7 +186,7 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); list_del_init(&ii->i_dirty); truncate_inode_pages(&ii->vfs_inode.i_data, 0); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 2e8eb263cf0f6..7b2bc41733131 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -29,12 +29,14 @@ * @cno: checkpoint number * @root: pointer on NILFS root object (mounted checkpoint) * @for_gc: inode for GC flag + * @for_btnc: inode for B-tree node cache flag */ struct nilfs_iget_args { u64 ino; __u64 cno; struct nilfs_root *root; - int for_gc; + bool for_gc; + bool for_btnc; }; static int nilfs_iget_test(struct inode *inode, void *opaque); @@ -314,7 +316,8 @@ static int nilfs_insert_inode_locked(struct inode *inode, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = 0 + .ino = ino, .root = root, .cno = 0, .for_gc = false, + .for_btnc = false }; return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); @@ -527,6 +530,13 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) return 0; ii = NILFS_I(inode); + if (test_bit(NILFS_I_BTNC, &ii->i_state)) { + if (!args->for_btnc) + return 0; + } else if (args->for_btnc) { + return 0; + } + if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) return !args->for_gc; @@ -538,15 +548,15 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) struct nilfs_iget_args *args = opaque; inode->i_ino = args->ino; - if (args->for_gc) { + NILFS_I(inode)->i_cno = args->cno; + NILFS_I(inode)->i_root = args->root; + if (args->root && args->ino == NILFS_ROOT_INO) + nilfs_get_root(args->root); + + if (args->for_gc) NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); - NILFS_I(inode)->i_cno = args->cno; - NILFS_I(inode)->i_root = NULL; - } else { - if (args->root && args->ino == NILFS_ROOT_INO) - nilfs_get_root(args->root); - NILFS_I(inode)->i_root = args->root; - } + if (args->for_btnc) + NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); return 0; } @@ -554,7 +564,8 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = 0 + .ino = ino, .root = root, .cno = 0, .for_gc = false, + .for_btnc = false }; return ilookup5(sb, ino, nilfs_iget_test, &args); @@ -564,7 +575,8 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = 0 + .ino = ino, .root = root, .cno = 0, .for_gc = false, + .for_btnc = false }; return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -595,7 +607,8 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno) { struct nilfs_iget_args args = { - .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 + .ino = ino, .root = NULL, .cno = cno, .for_gc = true, + .for_btnc = false }; struct inode *inode; int err; @@ -615,6 +628,68 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, return inode; } +/** + * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode + * @inode: inode object + * + * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode, + * or does nothing if the inode already has it. This function allocates + * an additional inode to maintain page cache of B-tree nodes one-on-one. + * + * Return Value: On success, 0 is returned. On errors, one of the following + * negative error code is returned. + * + * %-ENOMEM - Insufficient memory available. + */ +int nilfs_attach_btree_node_cache(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct inode *btnc_inode; + struct nilfs_iget_args args; + + if (ii->i_assoc_inode) + return 0; + + args.ino = inode->i_ino; + args.root = ii->i_root; + args.cno = ii->i_cno; + args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; + args.for_btnc = true; + + btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, + nilfs_iget_set, &args); + if (unlikely(!btnc_inode)) + return -ENOMEM; + if (btnc_inode->i_state & I_NEW) { + nilfs_init_btnc_inode(btnc_inode); + unlock_new_inode(btnc_inode); + } + NILFS_I(btnc_inode)->i_assoc_inode = inode; + NILFS_I(btnc_inode)->i_bmap = ii->i_bmap; + ii->i_assoc_inode = btnc_inode; + + return 0; +} + +/** + * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode + * @inode: inode object + * + * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its + * holder inode bound to @inode, or does nothing if @inode doesn't have it. + */ +void nilfs_detach_btree_node_cache(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct inode *btnc_inode = ii->i_assoc_inode; + + if (btnc_inode) { + NILFS_I(btnc_inode)->i_assoc_inode = NULL; + ii->i_assoc_inode = NULL; + iput(btnc_inode); + } +} + void nilfs_write_inode_common(struct inode *inode, struct nilfs_inode *raw_inode, int has_bmap) { @@ -762,7 +837,8 @@ static void nilfs_clear_inode(struct inode *inode) if (test_bit(NILFS_I_BMAP, &ii->i_state)) nilfs_bmap_clear(ii->i_bmap); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + if (!test_bit(NILFS_I_BTNC, &ii->i_state)) + nilfs_detach_btree_node_cache(inode); if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) nilfs_put_root(ii->i_root); diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 97769fe4d5885..21acd2d4b4d42 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -532,7 +532,7 @@ int nilfs_mdt_save_to_shadow_map(struct inode *inode) goto out; ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, - &ii->i_btnode_cache); + ii->i_assoc_inode->i_mapping); if (ret) goto out; @@ -623,8 +623,9 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) nilfs_clear_dirty_pages(inode->i_mapping, true); nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); - nilfs_clear_dirty_pages(&ii->i_btnode_cache, true); - nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); + nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); + nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, + &shadow->frozen_btnodes); nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 60b21b6eeac06..6c5148de84ffc 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -28,7 +28,7 @@ * @i_xattr: * @i_dir_start_lookup: page index of last successful search * @i_cno: checkpoint number for GC inode - * @i_btnode_cache: cached pages of b-tree nodes + * @i_assoc_inode: associated inode (B-tree node cache holder or back pointer) * @i_dirty: list for connecting dirty files * @xattr_sem: semaphore for extended attributes processing * @i_bh: buffer contains disk inode @@ -43,7 +43,7 @@ struct nilfs_inode_info { __u64 i_xattr; /* sector_t ??? */ __u32 i_dir_start_lookup; __u64 i_cno; /* check point number for GC inode */ - struct address_space i_btnode_cache; + struct inode *i_assoc_inode; struct list_head i_dirty; /* List for connecting dirty files */ #ifdef CONFIG_NILFS_XATTR @@ -75,13 +75,6 @@ NILFS_BMAP_I(const struct nilfs_bmap *bmap) return container_of(bmap, struct nilfs_inode_info, i_bmap_data); } -static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) -{ - struct nilfs_inode_info *ii = - container_of(btnc, struct nilfs_inode_info, i_btnode_cache); - return &ii->vfs_inode; -} - /* * Dynamic state flags of NILFS on-memory inode (i_state) */ @@ -98,6 +91,7 @@ enum { NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */ NILFS_I_BMAP, /* has bmap and btnode_cache */ NILFS_I_GCINODE, /* inode for GC, on memory only */ + NILFS_I_BTNC, /* inode for btree node cache */ }; /* @@ -267,6 +261,8 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, unsigned long ino); extern struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno); +int nilfs_attach_btree_node_cache(struct inode *inode); +void nilfs_detach_btree_node_cache(struct inode *inode); extern void nilfs_update_inode(struct inode *, struct buffer_head *, int); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 171fb5cd427fd..d1a148f0cae33 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -448,10 +448,9 @@ void nilfs_mapping_init(struct address_space *mapping, struct inode *inode) /* * NILFS2 needs clear_page_dirty() in the following two cases: * - * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears - * page dirty flags when it copies back pages from the shadow cache - * (gcdat->{i_mapping,i_btnode_cache}) to its original cache - * (dat->{i_mapping,i_btnode_cache}). + * 1) For B-tree node pages and data pages of DAT file, NILFS2 clears dirty + * flag of pages when it copies back pages from shadow cache to the + * original cache. * * 2) Some B-tree operations like insertion or deletion may dispose buffers * in dirty state, and this needs to cancel the dirty state of their pages. diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 686c8ee7b29ce..314a23a166893 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -733,15 +733,18 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, struct list_head *listp) { struct nilfs_inode_info *ii = NILFS_I(inode); - struct address_space *mapping = &ii->i_btnode_cache; + struct inode *btnc_inode = ii->i_assoc_inode; struct pagevec pvec; struct buffer_head *bh, *head; unsigned int i; pgoff_t index = 0; + if (!btnc_inode) + return; + pagevec_init(&pvec); - while (pagevec_lookup_tag(&pvec, mapping, &index, + while (pagevec_lookup_tag(&pvec, btnc_inode->i_mapping, &index, PAGECACHE_TAG_DIRTY)) { for (i = 0; i < pagevec_count(&pvec); i++) { bh = head = page_buffers(pvec.pages[i]); @@ -2410,7 +2413,7 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) continue; list_del_init(&ii->i_dirty); truncate_inode_pages(&ii->vfs_inode.i_data, 0); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index f6b2d280aab5a..2883ab625f61f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -157,7 +157,8 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) ii->i_bh = NULL; ii->i_state = 0; ii->i_cno = 0; - nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode); + ii->i_assoc_inode = NULL; + ii->i_bmap = &ii->i_bmap_data; return &ii->vfs_inode; } @@ -1377,8 +1378,6 @@ static void nilfs_inode_init_once(void *obj) #ifdef CONFIG_NILFS_XATTR init_rwsem(&ii->xattr_sem); #endif - address_space_init_once(&ii->i_btnode_cache); - ii->i_bmap = &ii->i_bmap_data; inode_init_once(&ii->vfs_inode); } -- GitLab From e156805ae285823bf76a543f963c624c40dee1c0 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 1 Apr 2022 11:28:21 -0700 Subject: [PATCH 0043/4335] nilfs2: fix lockdep warnings during disk space reclamation [ Upstream commit 6e211930f79aa45d422009a5f2e5467d2369ffe5 ] During disk space reclamation, nilfs2 still emits the following lockdep warning due to page/folio operations on shadowed page caches that nilfs2 uses to get a snapshot of DAT file in memory: WARNING: CPU: 0 PID: 2643 at include/linux/backing-dev.h:272 __folio_mark_dirty+0x645/0x670 ... RIP: 0010:__folio_mark_dirty+0x645/0x670 ... Call Trace: filemap_dirty_folio+0x74/0xd0 __set_page_dirty_nobuffers+0x85/0xb0 nilfs_copy_dirty_pages+0x288/0x510 [nilfs2] nilfs_mdt_save_to_shadow_map+0x50/0xe0 [nilfs2] nilfs_clean_segments+0xee/0x5d0 [nilfs2] nilfs_ioctl_clean_segments.isra.19+0xb08/0xf40 [nilfs2] nilfs_ioctl+0xc52/0xfb0 [nilfs2] __x64_sys_ioctl+0x11d/0x170 This fixes the remaining warning by using inode objects to hold those page caches. Link: https://lkml.kernel.org/r/1647867427-30498-3-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Matthew Wilcox Cc: David Hildenbrand Cc: Hao Sun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/nilfs2/dat.c | 4 ++- fs/nilfs2/inode.c | 63 ++++++++++++++++++++++++++++++++++++++++++++--- fs/nilfs2/mdt.c | 38 +++++++++++++++++++--------- fs/nilfs2/mdt.h | 6 ++--- fs/nilfs2/nilfs.h | 2 ++ 5 files changed, 92 insertions(+), 21 deletions(-) diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 8bccdf1158fce..1a3d183027b9e 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -497,7 +497,9 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, di = NILFS_DAT_I(dat); lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); nilfs_palloc_setup_cache(dat, &di->palloc_cache); - nilfs_mdt_setup_shadow_map(dat, &di->shadow); + err = nilfs_mdt_setup_shadow_map(dat, &di->shadow); + if (err) + goto failed; err = nilfs_read_inode_common(dat, raw_inode); if (err) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 7b2bc41733131..2466f8b8be95c 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -30,6 +30,7 @@ * @root: pointer on NILFS root object (mounted checkpoint) * @for_gc: inode for GC flag * @for_btnc: inode for B-tree node cache flag + * @for_shadow: inode for shadowed page cache flag */ struct nilfs_iget_args { u64 ino; @@ -37,6 +38,7 @@ struct nilfs_iget_args { struct nilfs_root *root; bool for_gc; bool for_btnc; + bool for_shadow; }; static int nilfs_iget_test(struct inode *inode, void *opaque); @@ -317,7 +319,7 @@ static int nilfs_insert_inode_locked(struct inode *inode, { struct nilfs_iget_args args = { .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false + .for_btnc = false, .for_shadow = false }; return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); @@ -536,6 +538,12 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) } else if (args->for_btnc) { return 0; } + if (test_bit(NILFS_I_SHADOW, &ii->i_state)) { + if (!args->for_shadow) + return 0; + } else if (args->for_shadow) { + return 0; + } if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) return !args->for_gc; @@ -557,6 +565,8 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); if (args->for_btnc) NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); + if (args->for_shadow) + NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW); return 0; } @@ -565,7 +575,7 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, { struct nilfs_iget_args args = { .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false + .for_btnc = false, .for_shadow = false }; return ilookup5(sb, ino, nilfs_iget_test, &args); @@ -576,7 +586,7 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, { struct nilfs_iget_args args = { .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false + .for_btnc = false, .for_shadow = false }; return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -608,7 +618,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, { struct nilfs_iget_args args = { .ino = ino, .root = NULL, .cno = cno, .for_gc = true, - .for_btnc = false + .for_btnc = false, .for_shadow = false }; struct inode *inode; int err; @@ -655,6 +665,7 @@ int nilfs_attach_btree_node_cache(struct inode *inode) args.cno = ii->i_cno; args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; args.for_btnc = true; + args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0; btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -690,6 +701,50 @@ void nilfs_detach_btree_node_cache(struct inode *inode) } } +/** + * nilfs_iget_for_shadow - obtain inode for shadow mapping + * @inode: inode object that uses shadow mapping + * + * nilfs_iget_for_shadow() allocates a pair of inodes that holds page + * caches for shadow mapping. The page cache for data pages is set up + * in one inode and the one for b-tree node pages is set up in the + * other inode, which is attached to the former inode. + * + * Return Value: On success, a pointer to the inode for data pages is + * returned. On errors, one of the following negative error code is returned + * in a pointer type. + * + * %-ENOMEM - Insufficient memory available. + */ +struct inode *nilfs_iget_for_shadow(struct inode *inode) +{ + struct nilfs_iget_args args = { + .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false, + .for_btnc = false, .for_shadow = true + }; + struct inode *s_inode; + int err; + + s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, + nilfs_iget_set, &args); + if (unlikely(!s_inode)) + return ERR_PTR(-ENOMEM); + if (!(s_inode->i_state & I_NEW)) + return inode; + + NILFS_I(s_inode)->i_flags = 0; + memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); + mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); + + err = nilfs_attach_btree_node_cache(s_inode); + if (unlikely(err)) { + iget_failed(s_inode); + return ERR_PTR(err); + } + unlock_new_inode(s_inode); + return s_inode; +} + void nilfs_write_inode_common(struct inode *inode, struct nilfs_inode *raw_inode, int has_bmap) { diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 21acd2d4b4d42..131b5add32eeb 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -470,9 +470,18 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) void nilfs_mdt_clear(struct inode *inode) { struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + struct nilfs_shadow_map *shadow = mdi->mi_shadow; if (mdi->mi_palloc_cache) nilfs_palloc_destroy_cache(inode); + + if (shadow) { + struct inode *s_inode = shadow->inode; + + shadow->inode = NULL; + iput(s_inode); + mdi->mi_shadow = NULL; + } } /** @@ -506,12 +515,15 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode, struct nilfs_shadow_map *shadow) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct inode *s_inode; INIT_LIST_HEAD(&shadow->frozen_buffers); - address_space_init_once(&shadow->frozen_data); - nilfs_mapping_init(&shadow->frozen_data, inode); - address_space_init_once(&shadow->frozen_btnodes); - nilfs_mapping_init(&shadow->frozen_btnodes, inode); + + s_inode = nilfs_iget_for_shadow(inode); + if (IS_ERR(s_inode)) + return PTR_ERR(s_inode); + + shadow->inode = s_inode; mi->mi_shadow = shadow; return 0; } @@ -525,13 +537,14 @@ int nilfs_mdt_save_to_shadow_map(struct inode *inode) struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_inode_info *ii = NILFS_I(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; + struct inode *s_inode = shadow->inode; int ret; - ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping); + ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping); if (ret) goto out; - ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, + ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping, ii->i_assoc_inode->i_mapping); if (ret) goto out; @@ -548,7 +561,7 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) struct page *page; int blkbits = inode->i_blkbits; - page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); + page = grab_cache_page(shadow->inode->i_mapping, bh->b_page->index); if (!page) return -ENOMEM; @@ -580,7 +593,7 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) struct page *page; int n; - page = find_lock_page(&shadow->frozen_data, bh->b_page->index); + page = find_lock_page(shadow->inode->i_mapping, bh->b_page->index); if (page) { if (page_has_buffers(page)) { n = bh_offset(bh) >> inode->i_blkbits; @@ -621,11 +634,11 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) nilfs_palloc_clear_cache(inode); nilfs_clear_dirty_pages(inode->i_mapping, true); - nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); + nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, - &shadow->frozen_btnodes); + NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); @@ -640,10 +653,11 @@ void nilfs_mdt_clear_shadow_map(struct inode *inode) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; + struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode; down_write(&mi->mi_sem); nilfs_release_frozen_buffers(shadow); - truncate_inode_pages(&shadow->frozen_data, 0); - truncate_inode_pages(&shadow->frozen_btnodes, 0); + truncate_inode_pages(shadow->inode->i_mapping, 0); + truncate_inode_pages(shadow_btnc_inode->i_mapping, 0); up_write(&mi->mi_sem); } diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index e77aea4bb921c..9d8ac0d27c16e 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -18,14 +18,12 @@ /** * struct nilfs_shadow_map - shadow mapping of meta data file * @bmap_store: shadow copy of bmap state - * @frozen_data: shadowed dirty data pages - * @frozen_btnodes: shadowed dirty b-tree nodes' pages + * @inode: holder of page caches used in shadow mapping * @frozen_buffers: list of frozen buffers */ struct nilfs_shadow_map { struct nilfs_bmap_store bmap_store; - struct address_space frozen_data; - struct address_space frozen_btnodes; + struct inode *inode; struct list_head frozen_buffers; }; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 6c5148de84ffc..7dcb77d387593 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -92,6 +92,7 @@ enum { NILFS_I_BMAP, /* has bmap and btnode_cache */ NILFS_I_GCINODE, /* inode for GC, on memory only */ NILFS_I_BTNC, /* inode for btree node cache */ + NILFS_I_SHADOW, /* inode for shadowed page cache */ }; /* @@ -263,6 +264,7 @@ extern struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno); int nilfs_attach_btree_node_cache(struct inode *inode); void nilfs_detach_btree_node_cache(struct inode *inode); +struct inode *nilfs_iget_for_shadow(struct inode *inode); extern void nilfs_update_inode(struct inode *, struct buffer_head *, int); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); -- GitLab From 74f64e7d18bbabccb2c0cf1c54ce36ca3a5d9df8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 16 May 2022 12:31:12 +0200 Subject: [PATCH 0044/4335] ALSA: usb-audio: Restore Rane SL-1 quirk commit 5c62383c06837b5719cd5447a5758b791279e653 upstream. At cleaning up and moving the device rename from the quirk table to its own table, we removed the entry for Rane SL-1 as we thought it's only for renaming. It turned out, however, that the quirk is required for matching with the device that declares itself as no standard audio but only as vendor-specific. Restore the quirk entry for Rane SL-1 to fix the regression. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215887 Fixes: 5436f59bc5bc ("ALSA: usb-audio: Move device rename and profile quirks to an internal table") Cc: Link: https://lore.kernel.org/r/20220516103112.12950-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks-table.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 0ea39565e6232..40a5e3eb4ef26 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3235,6 +3235,15 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, +/* Rane SL-1 */ +{ + USB_DEVICE(0x13e5, 0x0001), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + } +}, + /* disabled due to regression for other devices; * see https://bugzilla.kernel.org/show_bug.cgi?id=199905 */ -- GitLab From 48550a90e4089dc31ef4a69eccb16bece5bc12a8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 10 May 2022 12:36:26 +0200 Subject: [PATCH 0045/4335] ALSA: wavefront: Proper check of get_user() error commit a34ae6c0660d3b96b0055f68ef74dc9478852245 upstream. The antient ISA wavefront driver reads its sample patch data (uploaded over an ioctl) via __get_user() with no good reason; likely just for some performance optimizations in the past. Let's change this to the standard get_user() and the error check for handling the fault case properly. Reported-by: Linus Torvalds Cc: Link: https://lore.kernel.org/r/20220510103626.16635-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/isa/wavefront/wavefront_synth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c index 69cbc79fbb716..2aaaa68071744 100644 --- a/sound/isa/wavefront/wavefront_synth.c +++ b/sound/isa/wavefront/wavefront_synth.c @@ -1094,7 +1094,8 @@ wavefront_send_sample (snd_wavefront_t *dev, if (dataptr < data_end) { - __get_user (sample_short, dataptr); + if (get_user(sample_short, dataptr)) + return -EFAULT; dataptr += skip; if (data_is_unsigned) { /* GUS ? */ -- GitLab From f380aba421990e04db89f14d8cf12b7e33cc76c3 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Thu, 12 May 2022 20:09:56 +0200 Subject: [PATCH 0046/4335] ALSA: hda/realtek: Add quirk for TongFang devices with pop noise commit 8b3b2392ed68bcd17c7eb84ca615ce1e5f115b99 upstream. When audio stops playing there is an audible "pop"-noise when using headphones on the TongFang GMxMRxx, GKxNRxx, GMxZGxx, GMxTGxx and GMxAGxx. This quirk fixes this mostly. Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220512180956.281804-1-wse@tuxedocomputers.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b2e98f15a4567..1880e30341a07 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9076,6 +9076,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS), + SND_PCI_QUIRK(0x1d05, 0x1096, "TongFang GMxMRxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1100, "TongFang GKxNRxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1111, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1119, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1129, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), -- GitLab From e085354dde254bc6c83ee604ea66c2b36f9f9067 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 May 2022 20:38:06 +0200 Subject: [PATCH 0047/4335] perf: Fix sys_perf_event_open() race against self commit 3ac6487e584a1eb54071dbe1212e05b884136704 upstream. Norbert reported that it's possible to race sys_perf_event_open() such that the looser ends up in another context from the group leader, triggering many WARNs. The move_group case checks for races against itself, but the !move_group case doesn't, seemingly relying on the previous group_leader->ctx == ctx check. However, that check is racy due to not holding any locks at that time. Therefore, re-check the result after acquiring locks and bailing if they no longer match. Additionally, clarify the not_move_group case from the move_group-vs-move_group race. Fixes: f63a8daa5812 ("perf: Fix event->ctx locking") Reported-by: Norbert Slusarek Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7c891a8eb3234..565910de92e9b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -12283,6 +12283,9 @@ SYSCALL_DEFINE5(perf_event_open, * Do not allow to attach to a group in a different task * or CPU context. If we're moving SW events, we'll fix * this up later, so allow that. + * + * Racy, not holding group_leader->ctx->mutex, see comment with + * perf_event_ctx_lock(). */ if (!move_group && group_leader->ctx != ctx) goto err_context; @@ -12348,6 +12351,7 @@ SYSCALL_DEFINE5(perf_event_open, } else { perf_event_ctx_unlock(group_leader, gctx); move_group = 0; + goto not_move_group; } } @@ -12364,7 +12368,17 @@ SYSCALL_DEFINE5(perf_event_open, } } else { mutex_lock(&ctx->mutex); + + /* + * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx, + * see the group_leader && !move_group test earlier. + */ + if (group_leader && group_leader->ctx != ctx) { + err = -EINVAL; + goto err_locked; + } } +not_move_group: if (ctx->task == TASK_TOMBSTONE) { err = -ESRCH; -- GitLab From 0f71433eb705c0c472290a367add222e8de58d1f Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 17 May 2022 14:08:16 +0200 Subject: [PATCH 0048/4335] selinux: fix bad cleanup on error in hashtab_duplicate() commit 6254bd3db316c9ccb3b05caa8b438be63245466f upstream. The code attempts to free the 'new' pointer using kmem_cache_free(), which is wrong because this function isn't responsible of freeing it. Instead, the function should free new->htable and clear the contents of *new (to prevent double-free). Cc: stable@vger.kernel.org Fixes: c7c556f1e81b ("selinux: refactor changing booleans") Reported-by: Wander Lairson Costa Signed-off-by: Ondrej Mosnacek Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/ss/hashtab.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c index a91fb0ed00de3..298098bb9c06d 100644 --- a/security/selinux/ss/hashtab.c +++ b/security/selinux/ss/hashtab.c @@ -178,7 +178,8 @@ int hashtab_duplicate(struct hashtab *new, struct hashtab *orig, kmem_cache_free(hashtab_node_cachep, cur); } } - kmem_cache_free(hashtab_node_cachep, new); + kfree(new->htable); + memset(new, 0, sizeof(*new)); return -ENOMEM; } -- GitLab From 42d8a6dc45fc6619b8def1a70b7bd0800bcc4574 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 16 May 2022 16:42:13 +0800 Subject: [PATCH 0049/4335] Fix double fget() in vhost_net_set_backend() commit fb4554c2232e44d595920f4d5c66cf8f7d13f9bc upstream. Descriptor table is a shared resource; two fget() on the same descriptor may return different struct file references. get_tap_ptr_ring() is called after we'd found (and pinned) the socket we'll be using and it tries to find the private tun/tap data structures associated with it. Redoing the lookup by the same file descriptor we'd used to get the socket is racy - we need to same struct file. Thanks to Jason for spotting a braino in the original variant of patch - I'd missed the use of fd == -1 for disabling backend, and in that case we can end up with sock == NULL and sock != oldsock. Cc: stable@kernel.org Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 792ab5f236471..297b5db474545 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1450,13 +1450,9 @@ err: return ERR_PTR(r); } -static struct ptr_ring *get_tap_ptr_ring(int fd) +static struct ptr_ring *get_tap_ptr_ring(struct file *file) { struct ptr_ring *ring; - struct file *file = fget(fd); - - if (!file) - return NULL; ring = tun_get_tx_ring(file); if (!IS_ERR(ring)) goto out; @@ -1465,7 +1461,6 @@ static struct ptr_ring *get_tap_ptr_ring(int fd) goto out; ring = NULL; out: - fput(file); return ring; } @@ -1552,8 +1547,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) r = vhost_net_enable_vq(n, vq); if (r) goto err_used; - if (index == VHOST_NET_VQ_RX) - nvq->rx_ring = get_tap_ptr_ring(fd); + if (index == VHOST_NET_VQ_RX) { + if (sock) + nvq->rx_ring = get_tap_ptr_ring(sock->file); + else + nvq->rx_ring = NULL; + } oldubufs = nvq->ubufs; nvq->ubufs = ubufs; -- GitLab From f5012bad435d4c12681cc4049b5c19430b9ae27f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 31 Mar 2022 19:38:51 +0200 Subject: [PATCH 0050/4335] PCI/PM: Avoid putting Elo i2 PCIe Ports in D3cold commit 92597f97a40bf661bebceb92e26ff87c76d562d4 upstream. If a Root Port on Elo i2 is put into D3cold and then back into D0, the downstream device becomes permanently inaccessible, so add a bridge D3 DMI quirk for that system. This was exposed by 14858dcc3b35 ("PCI: Use pci_update_current_state() in pci_enable_device_flags()"), but before that commit the Root Port in question had never been put into D3cold for real due to a mismatch between its power state retrieved from the PCI_PM_CTRL register (which was accessible even though the platform firmware indicated that the port was in D3cold) and the state of an ACPI power resource involved in its power management. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215715 Link: https://lore.kernel.org/r/11980172.O9o76ZdvQC@kreacher Reported-by: Stefan Gottwald Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a101faf3e88a9..0380543d10fdc 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2888,6 +2888,16 @@ static const struct dmi_system_id bridge_d3_blacklist[] = { DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."), DMI_MATCH(DMI_BOARD_NAME, "X299 DESIGNARE EX-CF"), }, + /* + * Downstream device is not accessible after putting a root port + * into D3cold and back into D0 on Elo i2. + */ + .ident = "Elo i2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Elo Touch Solutions"), + DMI_MATCH(DMI_PRODUCT_NAME, "Elo i2"), + DMI_MATCH(DMI_PRODUCT_VERSION, "RevB"), + }, }, #endif { } -- GitLab From 9f8f09d92ab626d7330670bd48aefe895b996d2b Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 12 May 2022 15:41:43 +0300 Subject: [PATCH 0051/4335] Revert "can: m_can: pci: use custom bit timings for Elkhart Lake" commit 14ea4a470494528c7e88da5c4116c24eb027059f upstream. This reverts commit 0e8ffdf3b86dfd44b651f91b12fcae76c25c453b. Commit 0e8ffdf3b86d ("can: m_can: pci: use custom bit timings for Elkhart Lake") broke the test case using bitrate switching. | ip link set can0 up type can bitrate 500000 dbitrate 4000000 fd on | ip link set can1 up type can bitrate 500000 dbitrate 4000000 fd on | candump can0 & | cangen can1 -I 0x800 -L 64 -e -fb \ | -D 11223344deadbeef55667788feedf00daabbccdd44332211 -n 1 -v -v Above commit does everything correctly according to the datasheet. However datasheet wasn't correct. I got confirmation from hardware engineers that the actual CAN hardware on Intel Elkhart Lake is based on M_CAN version v3.2.0. Datasheet was mirroring values from an another specification which was based on earlier M_CAN version leading to wrong bit timings. Therefore revert the commit and switch back to common bit timings. Fixes: ea4c1787685d ("can: m_can: pci: use custom bit timings for Elkhart Lake") Link: https://lore.kernel.org/all/20220512124144.536850-1-jarkko.nikula@linux.intel.com Signed-off-by: Jarkko Nikula Reported-by: Chee Hou Ong Reported-by: Aman Kumar Reported-by: Pallavi Kumari Cc: # v5.16+ Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/m_can/m_can_pci.c | 48 +++---------------------------- 1 file changed, 4 insertions(+), 44 deletions(-) diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index b56a54d6c5a9c..8f184a852a0a7 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -18,14 +18,9 @@ #define M_CAN_PCI_MMIO_BAR 0 +#define M_CAN_CLOCK_FREQ_EHL 200000000 #define CTL_CSR_INT_CTL_OFFSET 0x508 -struct m_can_pci_config { - const struct can_bittiming_const *bit_timing; - const struct can_bittiming_const *data_timing; - unsigned int clock_freq; -}; - struct m_can_pci_priv { struct m_can_classdev cdev; @@ -89,40 +84,9 @@ static struct m_can_ops m_can_pci_ops = { .read_fifo = iomap_read_fifo, }; -static const struct can_bittiming_const m_can_bittiming_const_ehl = { - .name = KBUILD_MODNAME, - .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ - .tseg1_max = 64, - .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ - .tseg2_max = 128, - .sjw_max = 128, - .brp_min = 1, - .brp_max = 512, - .brp_inc = 1, -}; - -static const struct can_bittiming_const m_can_data_bittiming_const_ehl = { - .name = KBUILD_MODNAME, - .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ - .tseg1_max = 16, - .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ - .tseg2_max = 8, - .sjw_max = 4, - .brp_min = 1, - .brp_max = 32, - .brp_inc = 1, -}; - -static const struct m_can_pci_config m_can_pci_ehl = { - .bit_timing = &m_can_bittiming_const_ehl, - .data_timing = &m_can_data_bittiming_const_ehl, - .clock_freq = 200000000, -}; - static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) { struct device *dev = &pci->dev; - const struct m_can_pci_config *cfg; struct m_can_classdev *mcan_class; struct m_can_pci_priv *priv; void __iomem *base; @@ -150,8 +114,6 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) if (!mcan_class) return -ENOMEM; - cfg = (const struct m_can_pci_config *)id->driver_data; - priv = cdev_to_priv(mcan_class); priv->base = base; @@ -163,9 +125,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) mcan_class->dev = &pci->dev; mcan_class->net->irq = pci_irq_vector(pci, 0); mcan_class->pm_clock_support = 1; - mcan_class->bit_timing = cfg->bit_timing; - mcan_class->data_timing = cfg->data_timing; - mcan_class->can.clock.freq = cfg->clock_freq; + mcan_class->can.clock.freq = id->driver_data; mcan_class->ops = &m_can_pci_ops; pci_set_drvdata(pci, mcan_class); @@ -218,8 +178,8 @@ static SIMPLE_DEV_PM_OPS(m_can_pci_pm_ops, m_can_pci_suspend, m_can_pci_resume); static const struct pci_device_id m_can_pci_id_table[] = { - { PCI_VDEVICE(INTEL, 0x4bc1), (kernel_ulong_t)&m_can_pci_ehl, }, - { PCI_VDEVICE(INTEL, 0x4bc2), (kernel_ulong_t)&m_can_pci_ehl, }, + { PCI_VDEVICE(INTEL, 0x4bc1), M_CAN_CLOCK_FREQ_EHL, }, + { PCI_VDEVICE(INTEL, 0x4bc2), M_CAN_CLOCK_FREQ_EHL, }, { } /* Terminating Entry */ }; MODULE_DEVICE_TABLE(pci, m_can_pci_id_table); -- GitLab From a3c0ba7870b9d4197949ef4606276bc687318104 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 May 2022 14:51:22 +0000 Subject: [PATCH 0052/4335] KVM: x86/mmu: Update number of zapped pages even if page list is stable commit b28cb0cd2c5e80a8c0feb408a0e4b0dbb6d132c5 upstream. When zapping obsolete pages, update the running count of zapped pages regardless of whether or not the list has become unstable due to zapping a shadow page with its own child shadow pages. If the VM is backed by mostly 4kb pages, KVM can zap an absurd number of SPTEs without bumping the batch count and thus without yielding. In the worst case scenario, this can cause a soft lokcup. watchdog: BUG: soft lockup - CPU#12 stuck for 22s! [dirty_log_perf_:13020] RIP: 0010:workingset_activation+0x19/0x130 mark_page_accessed+0x266/0x2e0 kvm_set_pfn_accessed+0x31/0x40 mmu_spte_clear_track_bits+0x136/0x1c0 drop_spte+0x1a/0xc0 mmu_page_zap_pte+0xef/0x120 __kvm_mmu_prepare_zap_page+0x205/0x5e0 kvm_mmu_zap_all_fast+0xd7/0x190 kvm_mmu_invalidate_zap_pages_in_memslot+0xe/0x10 kvm_page_track_flush_slot+0x5c/0x80 kvm_arch_flush_shadow_memslot+0xe/0x10 kvm_set_memslot+0x1a8/0x5d0 __kvm_set_memory_region+0x337/0x590 kvm_vm_ioctl+0xb08/0x1040 Fixes: fbb158cb88b6 ("KVM: x86/mmu: Revert "Revert "KVM: MMU: zap pages in batch""") Reported-by: David Matlack Reviewed-by: Ben Gardon Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20220511145122.3133334-1-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu/mmu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 806f9d42bcce8..c5fd00945e752 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5590,6 +5590,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; int nr_zapped, batch = 0; + bool unstable; restart: list_for_each_entry_safe_reverse(sp, node, @@ -5621,11 +5622,12 @@ restart: goto restart; } - if (__kvm_mmu_prepare_zap_page(kvm, sp, - &kvm->arch.zapped_obsolete_pages, &nr_zapped)) { - batch += nr_zapped; + unstable = __kvm_mmu_prepare_zap_page(kvm, sp, + &kvm->arch.zapped_obsolete_pages, &nr_zapped); + batch += nr_zapped; + + if (unstable) goto restart; - } } /* -- GitLab From e9ea44dc030deb9202f3c7d6340f9ad5b6035051 Mon Sep 17 00:00:00 2001 From: Prakruthi Deepak Heragu Date: Fri, 13 May 2022 10:46:54 -0700 Subject: [PATCH 0053/4335] arm64: paravirt: Use RCU read locks to guard stolen_time commit 19bef63f951e47dd4ba54810e6f7c7ff9344a3ef upstream. During hotplug, the stolen time data structure is unmapped and memset. There is a possibility of the timer IRQ being triggered before memset and stolen time is getting updated as part of this timer IRQ handler. This causes the below crash in timer handler - [ 3457.473139][ C5] Unable to handle kernel paging request at virtual address ffffffc03df05148 ... [ 3458.154398][ C5] Call trace: [ 3458.157648][ C5] para_steal_clock+0x30/0x50 [ 3458.162319][ C5] irqtime_account_process_tick+0x30/0x194 [ 3458.168148][ C5] account_process_tick+0x3c/0x280 [ 3458.173274][ C5] update_process_times+0x5c/0xf4 [ 3458.178311][ C5] tick_sched_timer+0x180/0x384 [ 3458.183164][ C5] __run_hrtimer+0x160/0x57c [ 3458.187744][ C5] hrtimer_interrupt+0x258/0x684 [ 3458.192698][ C5] arch_timer_handler_virt+0x5c/0xa0 [ 3458.198002][ C5] handle_percpu_devid_irq+0xdc/0x414 [ 3458.203385][ C5] handle_domain_irq+0xa8/0x168 [ 3458.208241][ C5] gic_handle_irq.34493+0x54/0x244 [ 3458.213359][ C5] call_on_irq_stack+0x40/0x70 [ 3458.218125][ C5] do_interrupt_handler+0x60/0x9c [ 3458.223156][ C5] el1_interrupt+0x34/0x64 [ 3458.227560][ C5] el1h_64_irq_handler+0x1c/0x2c [ 3458.232503][ C5] el1h_64_irq+0x7c/0x80 [ 3458.236736][ C5] free_vmap_area_noflush+0x108/0x39c [ 3458.242126][ C5] remove_vm_area+0xbc/0x118 [ 3458.246714][ C5] vm_remove_mappings+0x48/0x2a4 [ 3458.251656][ C5] __vunmap+0x154/0x278 [ 3458.255796][ C5] stolen_time_cpu_down_prepare+0xc0/0xd8 [ 3458.261542][ C5] cpuhp_invoke_callback+0x248/0xc34 [ 3458.266842][ C5] cpuhp_thread_fun+0x1c4/0x248 [ 3458.271696][ C5] smpboot_thread_fn+0x1b0/0x400 [ 3458.276638][ C5] kthread+0x17c/0x1e0 [ 3458.280691][ C5] ret_from_fork+0x10/0x20 As a fix, introduce rcu lock to update stolen time structure. Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online") Cc: stable@vger.kernel.org Suggested-by: Will Deacon Signed-off-by: Prakruthi Deepak Heragu Signed-off-by: Elliot Berman Reviewed-by: Srivatsa S. Bhat (VMware) Link: https://lore.kernel.org/r/20220513174654.362169-1-quic_eberman@quicinc.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/paravirt.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 75fed4460407d..57c7c211f8c71 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -35,7 +35,7 @@ static u64 native_steal_clock(int cpu) DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); struct pv_time_stolen_time_region { - struct pvclock_vcpu_stolen_time *kaddr; + struct pvclock_vcpu_stolen_time __rcu *kaddr; }; static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region); @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc); /* return stolen time in ns by asking the hypervisor */ static u64 para_steal_clock(int cpu) { + struct pvclock_vcpu_stolen_time *kaddr = NULL; struct pv_time_stolen_time_region *reg; + u64 ret = 0; reg = per_cpu_ptr(&stolen_time_region, cpu); @@ -61,28 +63,37 @@ static u64 para_steal_clock(int cpu) * online notification callback runs. Until the callback * has run we just return zero. */ - if (!reg->kaddr) + rcu_read_lock(); + kaddr = rcu_dereference(reg->kaddr); + if (!kaddr) { + rcu_read_unlock(); return 0; + } - return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time)); + ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time)); + rcu_read_unlock(); + return ret; } static int stolen_time_cpu_down_prepare(unsigned int cpu) { + struct pvclock_vcpu_stolen_time *kaddr = NULL; struct pv_time_stolen_time_region *reg; reg = this_cpu_ptr(&stolen_time_region); if (!reg->kaddr) return 0; - memunmap(reg->kaddr); - memset(reg, 0, sizeof(*reg)); + kaddr = rcu_replace_pointer(reg->kaddr, NULL, true); + synchronize_rcu(); + memunmap(kaddr); return 0; } static int stolen_time_cpu_online(unsigned int cpu) { + struct pvclock_vcpu_stolen_time *kaddr = NULL; struct pv_time_stolen_time_region *reg; struct arm_smccc_res res; @@ -93,17 +104,19 @@ static int stolen_time_cpu_online(unsigned int cpu) if (res.a0 == SMCCC_RET_NOT_SUPPORTED) return -EINVAL; - reg->kaddr = memremap(res.a0, + kaddr = memremap(res.a0, sizeof(struct pvclock_vcpu_stolen_time), MEMREMAP_WB); + rcu_assign_pointer(reg->kaddr, kaddr); + if (!reg->kaddr) { pr_warn("Failed to map stolen time data structure\n"); return -ENOMEM; } - if (le32_to_cpu(reg->kaddr->revision) != 0 || - le32_to_cpu(reg->kaddr->attributes) != 0) { + if (le32_to_cpu(kaddr->revision) != 0 || + le32_to_cpu(kaddr->attributes) != 0) { pr_warn_once("Unexpected revision or attributes in stolen time data\n"); return -ENXIO; } -- GitLab From c42f9a5a3aaeaee9dde94377a39054b2331f55fb Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 17 May 2022 10:35:32 +0100 Subject: [PATCH 0054/4335] arm64: mte: Ensure the cleared tags are visible before setting the PTE commit 1d0cb4c8864addc362bae98e8ffa5500c87e1227 upstream. As an optimisation, only pages mapped with PROT_MTE in user space have the MTE tags zeroed. This is done lazily at the set_pte_at() time via mte_sync_tags(). However, this function is missing a barrier and another CPU may see the PTE updated before the zeroed tags are visible. Add an smp_wmb() barrier if the mapping is Normal Tagged. Signed-off-by: Catalin Marinas Fixes: 34bfeea4a9e9 ("arm64: mte: Clear the tags when a page is mapped in user-space with PROT_MTE") Cc: # 5.10.x Reported-by: Vladimir Murzin Cc: Will Deacon Reviewed-by: Steven Price Tested-by: Vladimir Murzin Link: https://lore.kernel.org/r/20220517093532.127095-1-catalin.marinas@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/mte.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index e5e801bc53122..7c1c82c8115cc 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -73,6 +73,9 @@ void mte_sync_tags(pte_t old_pte, pte_t pte) mte_sync_page_tags(page, old_pte, check_swap, pte_is_tagged); } + + /* ensure the tags are visible before the PTE is set */ + smp_wmb(); } int memcmp_pages(struct page *page1, struct page *page2) -- GitLab From 8a06f25f5941c145773204f2f7abef95b4ffb8ce Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 3 May 2022 13:50:10 +0200 Subject: [PATCH 0055/4335] crypto: qcom-rng - fix infinite loop on requests not multiple of WORD_SZ commit 16287397ec5c08aa58db6acf7dbc55470d78087d upstream. The commit referenced in the Fixes tag removed the 'break' from the else branch in qcom_rng_read(), causing an infinite loop whenever 'max' is not a multiple of WORD_SZ. This can be reproduced e.g. by running: kcapi-rng -b 67 >/dev/null There are many ways to fix this without adding back the 'break', but they all seem more awkward than simply adding it back, so do just that. Tested on a machine with Qualcomm Amberwing processor. Fixes: a680b1832ced ("crypto: qcom-rng - ensure buffer for generate is completely filled") Cc: stable@vger.kernel.org Signed-off-by: Ondrej Mosnacek Reviewed-by: Brian Masney Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/qcom-rng.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/qcom-rng.c b/drivers/crypto/qcom-rng.c index 11f30fd48c141..031b5f701a0a3 100644 --- a/drivers/crypto/qcom-rng.c +++ b/drivers/crypto/qcom-rng.c @@ -65,6 +65,7 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max) } else { /* copy only remaining bytes */ memcpy(data, &val, max - currsize); + break; } } while (currsize < max); -- GitLab From 24501d51c6b40d5a87dfa6c07103c7b5b3e74822 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 14 May 2022 12:16:47 +0200 Subject: [PATCH 0056/4335] libceph: fix potential use-after-free on linger ping and resends commit 75dbb685f4e8786c33ddef8279bab0eadfb0731f upstream. request_reinit() is not only ugly as the comment rightfully suggests, but also unsafe. Even though it is called with osdc->lock held for write in all cases, resetting the OSD request refcount can still race with handle_reply() and result in use-after-free. Taking linger ping as an example: handle_timeout thread handle_reply thread down_read(&osdc->lock) req = lookup_request(...) ... finish_request(req) # unregisters up_read(&osdc->lock) __complete_request(req) linger_ping_cb(req) # req->r_kref == 2 because handle_reply still holds its ref down_write(&osdc->lock) send_linger_ping(lreq) req = lreq->ping_req # same req # cancel_linger_request is NOT # called - handle_reply already # unregistered request_reinit(req) WARN_ON(req->r_kref != 1) # fires request_init(req) kref_init(req->r_kref) # req->r_kref == 1 after kref_init ceph_osdc_put_request(req) kref_put(req->r_kref) # req->r_kref == 0 after kref_put, req is freed !!! This happens because send_linger_ping() always (re)uses the same OSD request for watch ping requests, relying on cancel_linger_request() to unregister it from the OSD client and rip its messages out from the messenger. send_linger() does the same for watch/notify registration and watch reconnect requests. Unfortunately cancel_request() doesn't guarantee that after it returns the OSD client would be completely done with the OSD request -- a ref could still be held and the callback (if specified) could still be invoked too. The original motivation for request_reinit() was inability to deal with allocation failures in send_linger() and send_linger_ping(). Switching to using osdc->req_mempool (currently only used by CephFS) respects that and allows us to get rid of request_reinit(). Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Reviewed-by: Xiubo Li Acked-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- include/linux/ceph/osd_client.h | 3 + net/ceph/osd_client.c | 302 +++++++++++++------------------- 2 files changed, 122 insertions(+), 183 deletions(-) diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 83fa08a065071..787fff5ec7f58 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -287,6 +287,9 @@ struct ceph_osd_linger_request { rados_watcherrcb_t errcb; void *data; + struct ceph_pagelist *request_pl; + struct page **notify_id_pages; + struct page ***preply_pages; size_t *preply_len; }; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ff8624a7c9643..f6b7436458aeb 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -537,43 +537,6 @@ static void request_init(struct ceph_osd_request *req) target_init(&req->r_t); } -/* - * This is ugly, but it allows us to reuse linger registration and ping - * requests, keeping the structure of the code around send_linger{_ping}() - * reasonable. Setting up a min_nr=2 mempool for each linger request - * and dealing with copying ops (this blasts req only, watch op remains - * intact) isn't any better. - */ -static void request_reinit(struct ceph_osd_request *req) -{ - struct ceph_osd_client *osdc = req->r_osdc; - bool mempool = req->r_mempool; - unsigned int num_ops = req->r_num_ops; - u64 snapid = req->r_snapid; - struct ceph_snap_context *snapc = req->r_snapc; - bool linger = req->r_linger; - struct ceph_msg *request_msg = req->r_request; - struct ceph_msg *reply_msg = req->r_reply; - - dout("%s req %p\n", __func__, req); - WARN_ON(kref_read(&req->r_kref) != 1); - request_release_checks(req); - - WARN_ON(kref_read(&request_msg->kref) != 1); - WARN_ON(kref_read(&reply_msg->kref) != 1); - target_destroy(&req->r_t); - - request_init(req); - req->r_osdc = osdc; - req->r_mempool = mempool; - req->r_num_ops = num_ops; - req->r_snapid = snapid; - req->r_snapc = snapc; - req->r_linger = linger; - req->r_request = request_msg; - req->r_reply = reply_msg; -} - struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, unsigned int num_ops, @@ -918,14 +881,30 @@ EXPORT_SYMBOL(osd_req_op_xattr_init); * @watch_opcode: CEPH_OSD_WATCH_OP_* */ static void osd_req_op_watch_init(struct ceph_osd_request *req, int which, - u64 cookie, u8 watch_opcode) + u8 watch_opcode, u64 cookie, u32 gen) { struct ceph_osd_req_op *op; op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0); op->watch.cookie = cookie; op->watch.op = watch_opcode; - op->watch.gen = 0; + op->watch.gen = gen; +} + +/* + * prot_ver, timeout and notify payload (may be empty) should already be + * encoded in @request_pl + */ +static void osd_req_op_notify_init(struct ceph_osd_request *req, int which, + u64 cookie, struct ceph_pagelist *request_pl) +{ + struct ceph_osd_req_op *op; + + op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); + op->notify.cookie = cookie; + + ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl); + op->indata_len = request_pl->length; } /* @@ -2727,10 +2706,13 @@ static void linger_release(struct kref *kref) WARN_ON(!list_empty(&lreq->pending_lworks)); WARN_ON(lreq->osd); - if (lreq->reg_req) - ceph_osdc_put_request(lreq->reg_req); - if (lreq->ping_req) - ceph_osdc_put_request(lreq->ping_req); + if (lreq->request_pl) + ceph_pagelist_release(lreq->request_pl); + if (lreq->notify_id_pages) + ceph_release_page_vector(lreq->notify_id_pages, 1); + + ceph_osdc_put_request(lreq->reg_req); + ceph_osdc_put_request(lreq->ping_req); target_destroy(&lreq->t); kfree(lreq); } @@ -2999,6 +2981,12 @@ static void linger_commit_cb(struct ceph_osd_request *req) struct ceph_osd_linger_request *lreq = req->r_priv; mutex_lock(&lreq->lock); + if (req != lreq->reg_req) { + dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", + __func__, lreq, lreq->linger_id, req, lreq->reg_req); + goto out; + } + dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq, lreq->linger_id, req->r_result); linger_reg_commit_complete(lreq, req->r_result); @@ -3022,6 +3010,7 @@ static void linger_commit_cb(struct ceph_osd_request *req) } } +out: mutex_unlock(&lreq->lock); linger_put(lreq); } @@ -3044,6 +3033,12 @@ static void linger_reconnect_cb(struct ceph_osd_request *req) struct ceph_osd_linger_request *lreq = req->r_priv; mutex_lock(&lreq->lock); + if (req != lreq->reg_req) { + dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", + __func__, lreq, lreq->linger_id, req, lreq->reg_req); + goto out; + } + dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__, lreq, lreq->linger_id, req->r_result, lreq->last_error); if (req->r_result < 0) { @@ -3053,46 +3048,64 @@ static void linger_reconnect_cb(struct ceph_osd_request *req) } } +out: mutex_unlock(&lreq->lock); linger_put(lreq); } static void send_linger(struct ceph_osd_linger_request *lreq) { - struct ceph_osd_request *req = lreq->reg_req; - struct ceph_osd_req_op *op = &req->r_ops[0]; + struct ceph_osd_client *osdc = lreq->osdc; + struct ceph_osd_request *req; + int ret; - verify_osdc_wrlocked(req->r_osdc); + verify_osdc_wrlocked(osdc); + mutex_lock(&lreq->lock); dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); - if (req->r_osd) - cancel_linger_request(req); + if (lreq->reg_req) { + if (lreq->reg_req->r_osd) + cancel_linger_request(lreq->reg_req); + ceph_osdc_put_request(lreq->reg_req); + } + + req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO); + BUG_ON(!req); - request_reinit(req); target_copy(&req->r_t, &lreq->t); req->r_mtime = lreq->mtime; - mutex_lock(&lreq->lock); if (lreq->is_watch && lreq->committed) { - WARN_ON(op->op != CEPH_OSD_OP_WATCH || - op->watch.cookie != lreq->linger_id); - op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT; - op->watch.gen = ++lreq->register_gen; + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT, + lreq->linger_id, ++lreq->register_gen); dout("lreq %p reconnect register_gen %u\n", lreq, - op->watch.gen); + req->r_ops[0].watch.gen); req->r_callback = linger_reconnect_cb; } else { - if (!lreq->is_watch) + if (lreq->is_watch) { + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH, + lreq->linger_id, 0); + } else { lreq->notify_id = 0; - else - WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH); + + refcount_inc(&lreq->request_pl->refcnt); + osd_req_op_notify_init(req, 0, lreq->linger_id, + lreq->request_pl); + ceph_osd_data_pages_init( + osd_req_op_data(req, 0, notify, response_data), + lreq->notify_id_pages, PAGE_SIZE, 0, false, false); + } dout("lreq %p register\n", lreq); req->r_callback = linger_commit_cb; } - mutex_unlock(&lreq->lock); + + ret = ceph_osdc_alloc_messages(req, GFP_NOIO); + BUG_ON(ret); req->r_priv = linger_get(lreq); req->r_linger = true; + lreq->reg_req = req; + mutex_unlock(&lreq->lock); submit_request(req, true); } @@ -3102,6 +3115,12 @@ static void linger_ping_cb(struct ceph_osd_request *req) struct ceph_osd_linger_request *lreq = req->r_priv; mutex_lock(&lreq->lock); + if (req != lreq->ping_req) { + dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", + __func__, lreq, lreq->linger_id, req, lreq->ping_req); + goto out; + } + dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n", __func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent, lreq->last_error); @@ -3117,6 +3136,7 @@ static void linger_ping_cb(struct ceph_osd_request *req) lreq->register_gen, req->r_ops[0].watch.gen); } +out: mutex_unlock(&lreq->lock); linger_put(lreq); } @@ -3124,8 +3144,8 @@ static void linger_ping_cb(struct ceph_osd_request *req) static void send_linger_ping(struct ceph_osd_linger_request *lreq) { struct ceph_osd_client *osdc = lreq->osdc; - struct ceph_osd_request *req = lreq->ping_req; - struct ceph_osd_req_op *op = &req->r_ops[0]; + struct ceph_osd_request *req; + int ret; if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) { dout("%s PAUSERD\n", __func__); @@ -3137,19 +3157,26 @@ static void send_linger_ping(struct ceph_osd_linger_request *lreq) __func__, lreq, lreq->linger_id, lreq->ping_sent, lreq->register_gen); - if (req->r_osd) - cancel_linger_request(req); + if (lreq->ping_req) { + if (lreq->ping_req->r_osd) + cancel_linger_request(lreq->ping_req); + ceph_osdc_put_request(lreq->ping_req); + } - request_reinit(req); - target_copy(&req->r_t, &lreq->t); + req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO); + BUG_ON(!req); - WARN_ON(op->op != CEPH_OSD_OP_WATCH || - op->watch.cookie != lreq->linger_id || - op->watch.op != CEPH_OSD_WATCH_OP_PING); - op->watch.gen = lreq->register_gen; + target_copy(&req->r_t, &lreq->t); + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id, + lreq->register_gen); req->r_callback = linger_ping_cb; + + ret = ceph_osdc_alloc_messages(req, GFP_NOIO); + BUG_ON(ret); + req->r_priv = linger_get(lreq); req->r_linger = true; + lreq->ping_req = req; ceph_osdc_get_request(req); account_request(req); @@ -3165,12 +3192,6 @@ static void linger_submit(struct ceph_osd_linger_request *lreq) down_write(&osdc->lock); linger_register(lreq); - if (lreq->is_watch) { - lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id; - lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id; - } else { - lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id; - } calc_target(osdc, &lreq->t, false); osd = lookup_create_osd(osdc, lreq->t.osd, true); @@ -3202,9 +3223,9 @@ static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq) */ static void __linger_cancel(struct ceph_osd_linger_request *lreq) { - if (lreq->is_watch && lreq->ping_req->r_osd) + if (lreq->ping_req && lreq->ping_req->r_osd) cancel_linger_request(lreq->ping_req); - if (lreq->reg_req->r_osd) + if (lreq->reg_req && lreq->reg_req->r_osd) cancel_linger_request(lreq->reg_req); cancel_linger_map_check(lreq); unlink_linger(lreq->osd, lreq); @@ -4653,43 +4674,6 @@ again: } EXPORT_SYMBOL(ceph_osdc_sync); -static struct ceph_osd_request * -alloc_linger_request(struct ceph_osd_linger_request *lreq) -{ - struct ceph_osd_request *req; - - req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO); - if (!req) - return NULL; - - ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); - ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); - return req; -} - -static struct ceph_osd_request * -alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode) -{ - struct ceph_osd_request *req; - - req = alloc_linger_request(lreq); - if (!req) - return NULL; - - /* - * Pass 0 for cookie because we don't know it yet, it will be - * filled in by linger_submit(). - */ - osd_req_op_watch_init(req, 0, 0, watch_opcode); - - if (ceph_osdc_alloc_messages(req, GFP_NOIO)) { - ceph_osdc_put_request(req); - return NULL; - } - - return req; -} - /* * Returns a handle, caller owns a ref. */ @@ -4719,18 +4703,6 @@ ceph_osdc_watch(struct ceph_osd_client *osdc, lreq->t.flags = CEPH_OSD_FLAG_WRITE; ktime_get_real_ts64(&lreq->mtime); - lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH); - if (!lreq->reg_req) { - ret = -ENOMEM; - goto err_put_lreq; - } - - lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING); - if (!lreq->ping_req) { - ret = -ENOMEM; - goto err_put_lreq; - } - linger_submit(lreq); ret = linger_reg_commit_wait(lreq); if (ret) { @@ -4768,8 +4740,8 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc, ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); req->r_flags = CEPH_OSD_FLAG_WRITE; ktime_get_real_ts64(&req->r_mtime); - osd_req_op_watch_init(req, 0, lreq->linger_id, - CEPH_OSD_WATCH_OP_UNWATCH); + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH, + lreq->linger_id, 0); ret = ceph_osdc_alloc_messages(req, GFP_NOIO); if (ret) @@ -4855,35 +4827,6 @@ out_put_req: } EXPORT_SYMBOL(ceph_osdc_notify_ack); -static int osd_req_op_notify_init(struct ceph_osd_request *req, int which, - u64 cookie, u32 prot_ver, u32 timeout, - void *payload, u32 payload_len) -{ - struct ceph_osd_req_op *op; - struct ceph_pagelist *pl; - int ret; - - op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); - op->notify.cookie = cookie; - - pl = ceph_pagelist_alloc(GFP_NOIO); - if (!pl) - return -ENOMEM; - - ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */ - ret |= ceph_pagelist_encode_32(pl, timeout); - ret |= ceph_pagelist_encode_32(pl, payload_len); - ret |= ceph_pagelist_append(pl, payload, payload_len); - if (ret) { - ceph_pagelist_release(pl); - return -ENOMEM; - } - - ceph_osd_data_pagelist_init(&op->notify.request_data, pl); - op->indata_len = pl->length; - return 0; -} - /* * @timeout: in seconds * @@ -4902,7 +4845,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, size_t *preply_len) { struct ceph_osd_linger_request *lreq; - struct page **pages; int ret; WARN_ON(!timeout); @@ -4915,41 +4857,35 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, if (!lreq) return -ENOMEM; - lreq->preply_pages = preply_pages; - lreq->preply_len = preply_len; - - ceph_oid_copy(&lreq->t.base_oid, oid); - ceph_oloc_copy(&lreq->t.base_oloc, oloc); - lreq->t.flags = CEPH_OSD_FLAG_READ; - - lreq->reg_req = alloc_linger_request(lreq); - if (!lreq->reg_req) { + lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO); + if (!lreq->request_pl) { ret = -ENOMEM; goto out_put_lreq; } - /* - * Pass 0 for cookie because we don't know it yet, it will be - * filled in by linger_submit(). - */ - ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout, - payload, payload_len); - if (ret) + ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */ + ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout); + ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len); + ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len); + if (ret) { + ret = -ENOMEM; goto out_put_lreq; + } /* for notify_id */ - pages = ceph_alloc_page_vector(1, GFP_NOIO); - if (IS_ERR(pages)) { - ret = PTR_ERR(pages); + lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO); + if (IS_ERR(lreq->notify_id_pages)) { + ret = PTR_ERR(lreq->notify_id_pages); + lreq->notify_id_pages = NULL; goto out_put_lreq; } - ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify, - response_data), - pages, PAGE_SIZE, 0, false, true); - ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO); - if (ret) - goto out_put_lreq; + lreq->preply_pages = preply_pages; + lreq->preply_len = preply_len; + + ceph_oid_copy(&lreq->t.base_oid, oid); + ceph_oloc_copy(&lreq->t.base_oloc, oloc); + lreq->t.flags = CEPH_OSD_FLAG_READ; linger_submit(lreq); ret = linger_reg_commit_wait(lreq); -- GitLab From 9d3ec4e5bf032f53137e11075846cec9c474ce4b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 17 May 2022 12:00:37 -0500 Subject: [PATCH 0057/4335] drm/amd: Don't reset dGPUs if the system is going to s2idle commit 7123d39dc24dcd21ff23d75f46f926b15269b9da upstream. An A+A configuration on ASUS ROG Strix G513QY proves that the ASIC reset for handling aborted suspend can't work with s2idle. This functionality was introduced in commit daf8de0874ab5b ("drm/amdgpu: always reset the asic in suspend (v2)"). A few other commits have gone on top of the ASIC reset, but this still doesn't work on the A+A configuration in s2idle. Avoid doing the reset on dGPUs specifically when using s2idle. Fixes: daf8de0874ab5b ("drm/amdgpu: always reset the asic in suspend (v2)") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2008 Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 14 ++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 7e73ac6fb21db..2eebefd26fa82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1411,9 +1411,11 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); +bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } +static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 0e12315fa0cb8..98ac53ee6bb55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1045,6 +1045,20 @@ bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) (pm_suspend_target_state == PM_SUSPEND_MEM); } +/** + * amdgpu_acpi_should_gpu_reset + * + * @adev: amdgpu_device_pointer + * + * returns true if should reset GPU, false if not + */ +bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) +{ + if (adev->flags & AMD_IS_APU) + return false; + return pm_suspend_target_state != PM_SUSPEND_TO_IDLE; +} + /** * amdgpu_acpi_is_s0ix_active * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 2bd7b9fe6005f..129661f728bf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2259,7 +2259,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - if (!adev->in_s0ix) + if (amdgpu_acpi_should_gpu_reset(adev)) return amdgpu_asic_reset(adev); return 0; -- GitLab From aaf0f01d074d6fd39ec1b01477f69cd688bf6c9d Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Tue, 10 May 2022 17:08:47 -0700 Subject: [PATCH 0058/4335] drm/i915/dmc: Add MMIO range restrictions commit 54395a33718af1c04b5098203335b25382291a16 upstream. Bspec has added some steps that check forDMC MMIO range before programming them v2: Fix for CI v3: move register defines to .h (Anusha) - Check MMIO restrictions per pipe - Add MMIO restricton for v1 dmc header as well (Lucas) v4: s/_PICK/_PICK_EVEN and use it only for Pipe DMC scenario. - clean up sanity check logic.(Lucas) - Add MMIO range for RKL as well.(Anusha) v5: Use DISPLAY_VER instead of per platform check (Lucas) BSpec: 49193 Cc: stable@vger.kernel.org Cc: Lucas De Marchi Signed-off-by: Anusha Srivatsa Reviewed-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220511000847.1068302-1-anusha.srivatsa@intel.com (cherry picked from commit 21c47196aec3a93f913a7515e1e7b30e6c54d6c6) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_dmc.c | 44 ++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 16 +++++++++ 2 files changed, 60 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 73076737add75..0e04d4dd1c132 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -375,6 +375,44 @@ static void dmc_set_fw_offset(struct intel_dmc *dmc, } } +static bool dmc_mmio_addr_sanity_check(struct intel_dmc *dmc, + const u32 *mmioaddr, u32 mmio_count, + int header_ver, u8 dmc_id) +{ + struct drm_i915_private *i915 = container_of(dmc, typeof(*i915), dmc); + u32 start_range, end_range; + int i; + + if (dmc_id >= DMC_FW_MAX) { + drm_warn(&i915->drm, "Unsupported firmware id %u\n", dmc_id); + return false; + } + + if (header_ver == 1) { + start_range = DMC_MMIO_START_RANGE; + end_range = DMC_MMIO_END_RANGE; + } else if (dmc_id == DMC_FW_MAIN) { + start_range = TGL_MAIN_MMIO_START; + end_range = TGL_MAIN_MMIO_END; + } else if (DISPLAY_VER(i915) >= 13) { + start_range = ADLP_PIPE_MMIO_START; + end_range = ADLP_PIPE_MMIO_END; + } else if (DISPLAY_VER(i915) >= 12) { + start_range = TGL_PIPE_MMIO_START(dmc_id); + end_range = TGL_PIPE_MMIO_END(dmc_id); + } else { + drm_warn(&i915->drm, "Unknown mmio range for sanity check"); + return false; + } + + for (i = 0; i < mmio_count; i++) { + if (mmioaddr[i] < start_range || mmioaddr[i] > end_range) + return false; + } + + return true; +} + static u32 parse_dmc_fw_header(struct intel_dmc *dmc, const struct intel_dmc_header_base *dmc_header, size_t rem_size, u8 dmc_id) @@ -444,6 +482,12 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, return 0; } + if (!dmc_mmio_addr_sanity_check(dmc, mmioaddr, mmio_count, + dmc_header->header_ver, dmc_id)) { + drm_err(&i915->drm, "DMC firmware has Wrong MMIO Addresses\n"); + return 0; + } + for (i = 0; i < mmio_count; i++) { dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]); dmc_info->mmiodata[i] = mmiodata[i]; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index bb64e7baa1cc1..3c70aa5229e5a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7818,6 +7818,22 @@ enum { /* MMIO address range for DMC program (0x80000 - 0x82FFF) */ #define DMC_MMIO_START_RANGE 0x80000 #define DMC_MMIO_END_RANGE 0x8FFFF +#define DMC_V1_MMIO_START_RANGE 0x80000 +#define TGL_MAIN_MMIO_START 0x8F000 +#define TGL_MAIN_MMIO_END 0x8FFFF +#define _TGL_PIPEA_MMIO_START 0x92000 +#define _TGL_PIPEA_MMIO_END 0x93FFF +#define _TGL_PIPEB_MMIO_START 0x96000 +#define _TGL_PIPEB_MMIO_END 0x97FFF +#define ADLP_PIPE_MMIO_START 0x5F000 +#define ADLP_PIPE_MMIO_END 0x5FFFF + +#define TGL_PIPE_MMIO_START(dmc_id) _PICK_EVEN(((dmc_id) - 1), _TGL_PIPEA_MMIO_START,\ + _TGL_PIPEB_MMIO_START) + +#define TGL_PIPE_MMIO_END(dmc_id) _PICK_EVEN(((dmc_id) - 1), _TGL_PIPEA_MMIO_END,\ + _TGL_PIPEB_MMIO_END) + #define SKL_DMC_DC3_DC5_COUNT _MMIO(0x80030) #define SKL_DMC_DC5_DC6_COUNT _MMIO(0x8002C) #define BXT_DMC_DC3_DC5_COUNT _MMIO(0x80038) -- GitLab From deec86168170d085d4f91445c1f72a900ed02372 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Mon, 16 May 2022 11:20:42 +0800 Subject: [PATCH 0059/4335] drm/dp/mst: fix a possible memory leak in fetch_monitor_name() commit 6e03b13cc7d9427c2c77feed1549191015615202 upstream. drm_dp_mst_get_edid call kmemdup to create mst_edid. So mst_edid need to be freed after use. Signed-off-by: Hangyu Hua Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20220516032042.13166-1-hbh25y@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 86d13d6bc4631..b3e3babe18c0e 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -4834,6 +4834,7 @@ static void fetch_monitor_name(struct drm_dp_mst_topology_mgr *mgr, mst_edid = drm_dp_mst_get_edid(port->connector, mgr, port); drm_edid_get_monitor_name(mst_edid, name, namelen); + kfree(mst_edid); } /** -- GitLab From 6a4c06e265eaf016081e11991b39b7bfb6aea832 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= Date: Tue, 17 May 2022 09:27:08 +0200 Subject: [PATCH 0060/4335] dma-buf: fix use of DMA_BUF_SET_NAME_{A,B} in userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7c3e9fcad9c7d8bb5d69a576044fb16b1d2e8a01 upstream. The typedefs u32 and u64 are not available in userspace. Thus user get an error he try to use DMA_BUF_SET_NAME_A or DMA_BUF_SET_NAME_B: $ gcc -Wall -c -MMD -c -o ioctls_list.o ioctls_list.c In file included from /usr/include/x86_64-linux-gnu/asm/ioctl.h:1, from /usr/include/linux/ioctl.h:5, from /usr/include/asm-generic/ioctls.h:5, from ioctls_list.c:11: ioctls_list.c:463:29: error: ‘u32’ undeclared here (not in a function) 463 | { "DMA_BUF_SET_NAME_A", DMA_BUF_SET_NAME_A, -1, -1 }, // linux/dma-buf.h | ^~~~~~~~~~~~~~~~~~ ioctls_list.c:464:29: error: ‘u64’ undeclared here (not in a function) 464 | { "DMA_BUF_SET_NAME_B", DMA_BUF_SET_NAME_B, -1, -1 }, // linux/dma-buf.h | ^~~~~~~~~~~~~~~~~~ The issue was initially reported here[1]. [1]: https://github.com/jerome-pouiller/ioctl/pull/14 Signed-off-by: Jérôme Pouiller Reviewed-by: Christian König Fixes: a5bff92eaac4 ("dma-buf: Fix SET_NAME ioctl uapi") CC: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20220517072708.245265-1-Jerome.Pouiller@silabs.com Signed-off-by: Christian König Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/dma-buf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index 8e4a2ca0bcbf7..b1523cb8ab307 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -92,7 +92,7 @@ struct dma_buf_sync { * between them in actual uapi, they're just different numbers. */ #define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *) -#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, u32) -#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, u64) +#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) +#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64) #endif -- GitLab From 250f71a575591c9a399fc07b2bbfa34a2be06d1a Mon Sep 17 00:00:00 2001 From: Charan Teja Kalla Date: Fri, 13 May 2022 16:58:16 +0530 Subject: [PATCH 0061/4335] dma-buf: ensure unique directory name for dmabuf stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 370704e707a5f2d3c9a1d4ed8bd8cd67507d7bb5 upstream. The dmabuf file uses get_next_ino()(through dma_buf_getfile() -> alloc_anon_inode()) to get an inode number and uses the same as a directory name under /sys/kernel/dmabuf/buffers/. This directory is used to collect the dmabuf stats and it is created through dma_buf_stats_setup(). At current, failure to create this directory entry can make the dma_buf_export() to fail. Now, as the get_next_ino() can definitely give a repetitive inode no causing the directory entry creation to fail with -EEXIST. This is a problem on the systems where dmabuf stats functionality is enabled on the production builds can make the dma_buf_export(), though the dmabuf memory is allocated successfully, to fail just because it couldn't create stats entry. This issue we are able to see on the snapdragon system within 13 days where there already exists a directory with inode no "122602" so dma_buf_stats_setup() failed with -EEXIST as it is trying to create the same directory entry. To make the dentry name as unique, use the dmabuf fs specific inode which is based on the simple atomic variable increment. There is tmpfs subsystem too which relies on its own inode generation rather than relying on the get_next_ino() for the same reason of avoiding the duplicate inodes[1]. [1] https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/patch/?id=e809d5f0b5c912fe981dce738f3283b2010665f0 Signed-off-by: Charan Teja Kalla Cc: # 5.15.x+ Reviewed-by: Greg Kroah-Hartman Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/1652441296-1986-1-git-send-email-quic_charante@quicinc.com Signed-off-by: Christian König Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/dma-buf.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index a1f09437b2b41..f9217e300eea9 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -436,6 +436,7 @@ static inline int is_dma_buf_file(struct file *file) static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) { + static atomic64_t dmabuf_inode = ATOMIC64_INIT(0); struct file *file; struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb); @@ -445,6 +446,13 @@ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) inode->i_size = dmabuf->size; inode_set_bytes(inode, dmabuf->size); + /* + * The ->i_ino acquired from get_next_ino() is not unique thus + * not suitable for using it as dentry name by dmabuf stats. + * Override ->i_ino with the unique and dmabuffs specific + * value. + */ + inode->i_ino = atomic64_add_return(1, &dmabuf_inode); file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf", flags, &dma_buf_fops); if (IS_ERR(file)) -- GitLab From b29774c208a3f29e6c9e5790752ec79f563ac9ee Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Tue, 29 Mar 2022 10:39:26 -0700 Subject: [PATCH 0062/4335] ARM: dts: aspeed-g6: remove FWQSPID group in pinctrl dtsi [ Upstream commit efddaa397cceefb61476e383c26fafd1f8ab6356 ] FWSPIDQ2 and FWSPIDQ3 are not part of FWSPI18 interface so remove FWQSPID group in pinctrl dtsi. These pins must be used with the FWSPI pins that are dedicated for boot SPI interface which provides same 3.3v logic level. Fixes: 2f6edb6bcb2f ("ARM: dts: aspeed: Fix AST2600 quad spi group") Signed-off-by: Jae Hyun Yoo Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20220329173932.2588289-2-quic_jaehyoo@quicinc.com Signed-off-by: Joel Stanley Signed-off-by: Sasha Levin --- arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi index e4775bbceecc6..06d60a8540e92 100644 --- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi +++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi @@ -117,11 +117,6 @@ groups = "FWSPID"; }; - pinctrl_fwqspid_default: fwqspid_default { - function = "FWSPID"; - groups = "FWQSPID"; - }; - pinctrl_fwspiwp_default: fwspiwp_default { function = "FWSPIWP"; groups = "FWSPIWP"; -- GitLab From f234feed77d2911ad195603bf023398dbb38c2d3 Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Tue, 29 Mar 2022 10:39:27 -0700 Subject: [PATCH 0063/4335] pinctrl: pinctrl-aspeed-g6: remove FWQSPID group in pinctrl [ Upstream commit 3eef2f48ba0933ba995529f522554ad5c276c39b ] FWSPIDQ2 and FWSPIDQ3 are not part of FWSPI18 interface so remove FWQSPID group in pinctrl. These pins must be used with the FWSPI pins that are dedicated for boot SPI interface which provides same 3.3v logic level. Fixes: 2eda1cdec49f ("pinctrl: aspeed: Add AST2600 pinmux support") Signed-off-by: Jae Hyun Yoo Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20220329173932.2588289-3-quic_jaehyoo@quicinc.com Signed-off-by: Joel Stanley Signed-off-by: Sasha Levin --- drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index a3fa03bcd9a30..54064714d73fb 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -1236,18 +1236,12 @@ FUNC_GROUP_DECL(SALT8, AA12); FUNC_GROUP_DECL(WDTRST4, AA12); #define AE12 196 -SIG_EXPR_LIST_DECL_SEMG(AE12, FWSPIDQ2, FWQSPID, FWSPID, - SIG_DESC_SET(SCU438, 4)); SIG_EXPR_LIST_DECL_SESG(AE12, GPIOY4, GPIOY4); -PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, FWSPIDQ2), - SIG_EXPR_LIST_PTR(AE12, GPIOY4)); +PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, GPIOY4)); #define AF12 197 -SIG_EXPR_LIST_DECL_SEMG(AF12, FWSPIDQ3, FWQSPID, FWSPID, - SIG_DESC_SET(SCU438, 5)); SIG_EXPR_LIST_DECL_SESG(AF12, GPIOY5, GPIOY5); -PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, FWSPIDQ3), - SIG_EXPR_LIST_PTR(AF12, GPIOY5)); +PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, GPIOY5)); #define AC12 198 SSSF_PIN_DECL(AC12, GPIOY6, FWSPIABR, SIG_DESC_SET(SCU438, 6)); @@ -1520,9 +1514,8 @@ SIG_EXPR_LIST_DECL_SEMG(Y4, EMMCDAT7, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 3)); PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, EMMCDAT7); GROUP_DECL(FWSPID, Y1, Y2, Y3, Y4); -GROUP_DECL(FWQSPID, Y1, Y2, Y3, Y4, AE12, AF12); GROUP_DECL(EMMCG8, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5, Y1, Y2, Y3, Y4); -FUNC_DECL_2(FWSPID, FWSPID, FWQSPID); +FUNC_DECL_1(FWSPID, FWSPID); FUNC_GROUP_DECL(VB, Y1, Y2, Y3, Y4); FUNC_DECL_3(EMMC, EMMCG1, EMMCG4, EMMCG8); /* @@ -1918,7 +1911,6 @@ static const struct aspeed_pin_group aspeed_g6_groups[] = { ASPEED_PINCTRL_GROUP(FSI2), ASPEED_PINCTRL_GROUP(FWSPIABR), ASPEED_PINCTRL_GROUP(FWSPID), - ASPEED_PINCTRL_GROUP(FWQSPID), ASPEED_PINCTRL_GROUP(FWSPIWP), ASPEED_PINCTRL_GROUP(GPIT0), ASPEED_PINCTRL_GROUP(GPIT1), -- GitLab From 75e9562bb4bf81a7943679add020fca161c1cfac Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Tue, 29 Mar 2022 10:39:32 -0700 Subject: [PATCH 0064/4335] ARM: dts: aspeed-g6: fix SPI1/SPI2 quad pin group [ Upstream commit 890362d41b244536ab63591f813393f5fdf59ed7 ] Fix incorrect function mappings in pinctrl_qspi1_default and pinctrl_qspi2_default since their function should be SPI1 and SPI2 respectively. Fixes: f510f04c8c83 ("ARM: dts: aspeed: Add AST2600 pinmux nodes") Signed-off-by: Jae Hyun Yoo Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20220329173932.2588289-8-quic_jaehyoo@quicinc.com Signed-off-by: Joel Stanley Signed-off-by: Sasha Levin --- arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi index 06d60a8540e92..ac07c240419a2 100644 --- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi +++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi @@ -648,12 +648,12 @@ }; pinctrl_qspi1_default: qspi1_default { - function = "QSPI1"; + function = "SPI1"; groups = "QSPI1"; }; pinctrl_qspi2_default: qspi2_default { - function = "QSPI2"; + function = "SPI2"; groups = "QSPI2"; }; -- GitLab From 794f7da40d652607f190fac73488ba4c835fa732 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Thu, 16 Sep 2021 16:00:45 -0500 Subject: [PATCH 0065/4335] ARM: dts: aspeed: Add ADC for AST2600 and enable for Rainier and Everest [ Upstream commit eaad40466bd715c4b342ac9f7c889f5281714feb ] Add the ADC nodes to the AST2600 devicetree. Enable ADC1 for Rainier and Everest systems and add an iio-hwmon node for the 7th channel to report the battery voltage. Tested on Rainier: ~# cat /sys/class/hwmon/hwmon11/in1_input 1347 Signed-off-by: Eddie James Link: https://lore.kernel.org/r/20210916210045.31769-1-eajames@linux.ibm.com Signed-off-by: Joel Stanley Signed-off-by: Sasha Levin --- arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts | 15 +++++++++++++++ arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts | 15 +++++++++++++++ arch/arm/boot/dts/aspeed-g6.dtsi | 20 ++++++++++++++++++++ 3 files changed, 50 insertions(+) diff --git a/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts b/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts index 2efd70666738c..af7ea7cab8cfa 100644 --- a/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts +++ b/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts @@ -231,6 +231,21 @@ gpios = <&gpio0 ASPEED_GPIO(P, 4) GPIO_ACTIVE_LOW>; }; }; + + iio-hwmon { + compatible = "iio-hwmon"; + io-channels = <&adc1 7>; + }; +}; + +&adc1 { + status = "okay"; + aspeed,int-vref-microvolt = <2500000>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_adc8_default &pinctrl_adc9_default + &pinctrl_adc10_default &pinctrl_adc11_default + &pinctrl_adc12_default &pinctrl_adc13_default + &pinctrl_adc14_default &pinctrl_adc15_default>; }; &gpio0 { diff --git a/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts b/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts index 6419c9762c0b6..6c9f34396a3ae 100644 --- a/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts +++ b/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts @@ -246,6 +246,21 @@ linux,code = <11>; }; }; + + iio-hwmon { + compatible = "iio-hwmon"; + io-channels = <&adc1 7>; + }; +}; + +&adc1 { + status = "okay"; + aspeed,int-vref-microvolt = <2500000>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_adc8_default &pinctrl_adc9_default + &pinctrl_adc10_default &pinctrl_adc11_default + &pinctrl_adc12_default &pinctrl_adc13_default + &pinctrl_adc14_default &pinctrl_adc15_default>; }; &ehci1 { diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi index 1b47be1704f83..ee171b3364fa0 100644 --- a/arch/arm/boot/dts/aspeed-g6.dtsi +++ b/arch/arm/boot/dts/aspeed-g6.dtsi @@ -364,6 +364,26 @@ status = "disabled"; }; + adc0: adc@1e6e9000 { + compatible = "aspeed,ast2600-adc0"; + reg = <0x1e6e9000 0x100>; + clocks = <&syscon ASPEED_CLK_APB2>; + resets = <&syscon ASPEED_RESET_ADC>; + interrupts = ; + #io-channel-cells = <1>; + status = "disabled"; + }; + + adc1: adc@1e6e9100 { + compatible = "aspeed,ast2600-adc1"; + reg = <0x1e6e9100 0x100>; + clocks = <&syscon ASPEED_CLK_APB2>; + resets = <&syscon ASPEED_RESET_ADC>; + interrupts = ; + #io-channel-cells = <1>; + status = "disabled"; + }; + gpio0: gpio@1e780000 { #gpio-cells = <2>; gpio-controller; -- GitLab From ed978be2afb5f467be1c14b0316c6a53787e300f Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Wed, 17 Nov 2021 11:51:05 +0800 Subject: [PATCH 0066/4335] ARM: dts: aspeed: Add secure boot controller node [ Upstream commit fea289467608ffddb2f8d3a740912047974bb183 ] The ast2600 has a secure boot controller. Signed-off-by: Joel Stanley Reviewed-by: Ryan Chen Link: https://lore.kernel.org/r/20211117035106.321454-3-joel@jms.id.au Signed-off-by: Joel Stanley Signed-off-by: Sasha Levin --- arch/arm/boot/dts/aspeed-g6.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi index ee171b3364fa0..8f947ed47fc5f 100644 --- a/arch/arm/boot/dts/aspeed-g6.dtsi +++ b/arch/arm/boot/dts/aspeed-g6.dtsi @@ -384,6 +384,11 @@ status = "disabled"; }; + sbc: secure-boot-controller@1e6f2000 { + compatible = "aspeed,ast2600-sbc"; + reg = <0x1e6f2000 0x1000>; + }; + gpio0: gpio@1e780000 { #gpio-cells = <2>; gpio-controller; -- GitLab From 53bac31c8d8ace1fcabe51ca75f4bfee8d6e37ff Mon Sep 17 00:00:00 2001 From: Howard Chiu Date: Tue, 29 Mar 2022 03:23:51 +0000 Subject: [PATCH 0067/4335] ARM: dts: aspeed: Add video engine to g6 [ Upstream commit 32e62d1beab70d485980013312e747a25c4e13f7 ] This node was accidentally removed by commit 645afe73f951 ("ARM: dts: aspeed: ast2600: Update XDMA engine node"). Fixes: 645afe73f951 ("ARM: dts: aspeed: ast2600: Update XDMA engine node") Signed-off-by: Howard Chiu Link: https://lore.kernel.org/r/SG2PR06MB2315C57600A0132FEF40F21EE61E9@SG2PR06MB2315.apcprd06.prod.outlook.com Signed-off-by: Joel Stanley Signed-off-by: Sasha Levin --- arch/arm/boot/dts/aspeed-g6.dtsi | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi index 8f947ed47fc5f..e5724b1a2e20d 100644 --- a/arch/arm/boot/dts/aspeed-g6.dtsi +++ b/arch/arm/boot/dts/aspeed-g6.dtsi @@ -389,6 +389,16 @@ reg = <0x1e6f2000 0x1000>; }; + video: video@1e700000 { + compatible = "aspeed,ast2600-video-engine"; + reg = <0x1e700000 0x1000>; + clocks = <&syscon ASPEED_CLK_GATE_VCLK>, + <&syscon ASPEED_CLK_GATE_ECLK>; + clock-names = "vclk", "eclk"; + interrupts = ; + status = "disabled"; + }; + gpio0: gpio@1e780000 { #gpio-cells = <2>; gpio-controller; -- GitLab From 84b6e3d58955176593be451bc9bbdf6b599fa327 Mon Sep 17 00:00:00 2001 From: Mattijs Korpershoek Date: Tue, 26 Apr 2022 14:57:14 +0200 Subject: [PATCH 0068/4335] pinctrl: mediatek: mt8365: fix IES control pins [ Upstream commit f680058f406863b55ac226d1c157701939c63db4 ] IES26 (BIT 16 of IES1_CFG_ADDR) controls the following pads: - PAD_I2S_DATA_IN (GPIO114) - PAD_I2S_LRCK (GPIO115) - PAD_I2S_BCK (GPIO116) The pinctrl table is wrong since it lists pins 114 to 112. Update the table with the correct values. Fixes: e94d8b6fb83a ("pinctrl: mediatek: add support for mt8365 SoC") Reported-by: Youngmin Han Signed-off-by: Mattijs Korpershoek Link: https://lore.kernel.org/r/20220426125714.298907-1-mkorpershoek@baylibre.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/mediatek/pinctrl-mt8365.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8365.c b/drivers/pinctrl/mediatek/pinctrl-mt8365.c index 79b1fee5a1eba..ddee0db72d264 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8365.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8365.c @@ -259,7 +259,7 @@ static const struct mtk_pin_ies_smt_set mt8365_ies_set[] = { MTK_PIN_IES_SMT_SPEC(104, 104, 0x420, 13), MTK_PIN_IES_SMT_SPEC(105, 109, 0x420, 14), MTK_PIN_IES_SMT_SPEC(110, 113, 0x420, 15), - MTK_PIN_IES_SMT_SPEC(114, 112, 0x420, 16), + MTK_PIN_IES_SMT_SPEC(114, 116, 0x420, 16), MTK_PIN_IES_SMT_SPEC(117, 119, 0x420, 17), MTK_PIN_IES_SMT_SPEC(120, 122, 0x420, 18), MTK_PIN_IES_SMT_SPEC(123, 125, 0x420, 19), -- GitLab From 636184fd1bd424281d0d147c623f636415151587 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Apr 2022 12:33:18 -0700 Subject: [PATCH 0069/4335] ALSA: hda - fix unused Realtek function when PM is not enabled [ Upstream commit c3d9ca93f1e3bd3d1adfc4479a12c82fed424c87 ] When CONFIG_PM is not enabled, alc_shutup() is not needed, so move it inside the #ifdef CONFIG_PM guard. Also drop some contiguous #endif / #ifdef CONFIG_PM for simplicity. Fixes this build warning: sound/pci/hda/patch_realtek.c:886:20: warning: unused function 'alc_shutup' Fixes: 08c189f2c552 ("ALSA: hda - Use generic parser codes for Realtek driver") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: https://lore.kernel.org/r/20220430193318.29024-1-rdunlap@infradead.org Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1880e30341a07..040825ea9a089 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -932,6 +932,9 @@ static int alc_init(struct hda_codec *codec) return 0; } +#define alc_free snd_hda_gen_free + +#ifdef CONFIG_PM static inline void alc_shutup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -945,9 +948,6 @@ static inline void alc_shutup(struct hda_codec *codec) alc_shutup_pins(codec); } -#define alc_free snd_hda_gen_free - -#ifdef CONFIG_PM static void alc_power_eapd(struct hda_codec *codec) { alc_auto_setup_eapd(codec, false); @@ -961,9 +961,7 @@ static int alc_suspend(struct hda_codec *codec) spec->power_hook(codec); return 0; } -#endif -#ifdef CONFIG_PM static int alc_resume(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; -- GitLab From 999ee216c65e8cb91c2b8a9ae203a18ca2a28671 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 12 May 2022 10:10:32 -0500 Subject: [PATCH 0070/4335] net: ipa: record proper RX transaction count [ Upstream commit d8290cbe1111105f92f0c8ab455bec8bf98d0630 ] Each time we are notified that some number of transactions on an RX channel has completed, we record the number of bytes that have been transferred since the previous notification. We also track the number of transactions completed, but that is not currently being calculated correctly; we're currently counting the number of such notifications, but each notification can represent many transaction completions. Fix this. Fixes: 650d1603825d8 ("soc: qcom: ipa: the generic software interface") Signed-off-by: Alex Elder Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ipa/gsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index a2fcdb1abdb96..a734e5576729e 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -1370,9 +1370,10 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index) struct gsi_event *event_done; struct gsi_event *event; struct gsi_trans *trans; + u32 trans_count = 0; u32 byte_count = 0; - u32 old_index; u32 event_avail; + u32 old_index; trans_info = &channel->trans_info; @@ -1393,6 +1394,7 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index) do { trans->len = __le16_to_cpu(event->len); byte_count += trans->len; + trans_count++; /* Move on to the next event and transaction */ if (--event_avail) @@ -1404,7 +1406,7 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index) /* We record RX bytes when they are received */ channel->byte_count += byte_count; - channel->trans_count++; + channel->trans_count += trans_count; } /* Initialize a ring, including allocating DMA memory for its entries */ -- GitLab From ad7491e92b715e06dfb9b23f243fe9f339cee626 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Thu, 12 May 2022 22:49:00 +0530 Subject: [PATCH 0071/4335] net: macb: Increment rx bd head after allocating skb and buffer [ Upstream commit 9500acc631dbb8b73166e25700e656b11f6007b6 ] In gem_rx_refill rx_prepared_head is incremented at the beginning of the while loop preparing the skb and data buffers. If the skb or data buffer allocation fails, this BD will be unusable BDs until the head loops back to the same BD (and obviously buffer allocation succeeds). In the unlikely event that there's a string of allocation failures, there will be an equal number of unusable BDs and an inconsistent RX BD chain. Hence increment the head at the end of the while loop to be clean. Fixes: 4df95131ea80 ("net/macb: change RX path for GEM") Signed-off-by: Harini Katakam Signed-off-by: Michal Simek Signed-off-by: Radhey Shyam Pandey Reviewed-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220512171900.32593-1-harini.katakam@xilinx.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 217c1a0f8940b..2fd3dd4b8b815 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1250,7 +1250,6 @@ static void gem_rx_refill(struct macb_queue *queue) /* Make hw descriptor updates visible to CPU */ rmb(); - queue->rx_prepared_head++; desc = macb_rx_desc(queue, entry); if (!queue->rx_skbuff[entry]) { @@ -1289,6 +1288,7 @@ static void gem_rx_refill(struct macb_queue *queue) dma_wmb(); desc->addr &= ~MACB_BIT(RX_USED); } + queue->rx_prepared_head++; } /* Make descriptor updates visible to hardware */ -- GitLab From 36d8cca5b46fe41b59f8011553495ede3b693703 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 14 Mar 2022 11:38:22 +0100 Subject: [PATCH 0072/4335] xfrm: rework default policy structure [ Upstream commit b58b1f563ab78955d37e9e43e02790a85c66ac05 ] This is a follow up of commit f8d858e607b2 ("xfrm: make user policy API complete"). The goal is to align userland API to the internal structures. Signed-off-by: Nicolas Dichtel Reviewed-by: Antony Antony Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- include/net/netns/xfrm.h | 6 +---- include/net/xfrm.h | 48 +++++++++++++++------------------------- net/xfrm/xfrm_policy.c | 10 ++++++--- net/xfrm/xfrm_user.c | 43 +++++++++++++++-------------------- 4 files changed, 44 insertions(+), 63 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 947733a639a6f..bd7c3be4af5d7 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -66,11 +66,7 @@ struct netns_xfrm { int sysctl_larval_drop; u32 sysctl_acq_expires; - u8 policy_default; -#define XFRM_POL_DEFAULT_IN 1 -#define XFRM_POL_DEFAULT_OUT 2 -#define XFRM_POL_DEFAULT_FWD 4 -#define XFRM_POL_DEFAULT_MASK 7 + u8 policy_default[XFRM_POLICY_MAX]; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_hdr; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 358dfe6fefefc..e03f0f8822262 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1080,25 +1080,18 @@ xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, un } #ifdef CONFIG_XFRM -static inline bool -xfrm_default_allow(struct net *net, int dir) -{ - u8 def = net->xfrm.policy_default; - - switch (dir) { - case XFRM_POLICY_IN: - return def & XFRM_POL_DEFAULT_IN ? false : true; - case XFRM_POLICY_OUT: - return def & XFRM_POL_DEFAULT_OUT ? false : true; - case XFRM_POLICY_FWD: - return def & XFRM_POL_DEFAULT_FWD ? false : true; - } - return false; -} - int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, unsigned short family); +static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb, + int dir) +{ + if (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) + return net->xfrm.policy_default[dir] == XFRM_USERPOLICY_ACCEPT; + + return false; +} + static inline int __xfrm_policy_check2(struct sock *sk, int dir, struct sk_buff *skb, unsigned int family, int reverse) @@ -1109,13 +1102,9 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); - if (xfrm_default_allow(net, dir)) - return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) || - (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || - __xfrm_policy_check(sk, ndir, skb, family); - else - return (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || - __xfrm_policy_check(sk, ndir, skb, family); + return __xfrm_check_nopolicy(net, skb, dir) || + (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || + __xfrm_policy_check(sk, ndir, skb, family); } static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) @@ -1167,13 +1156,12 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); - if (xfrm_default_allow(net, XFRM_POLICY_OUT)) - return !net->xfrm.policy_count[XFRM_POLICY_OUT] || - (skb_dst(skb)->flags & DST_NOXFRM) || - __xfrm_route_forward(skb, family); - else - return (skb_dst(skb)->flags & DST_NOXFRM) || - __xfrm_route_forward(skb, family); + if (!net->xfrm.policy_count[XFRM_POLICY_OUT] && + net->xfrm.policy_default[XFRM_POLICY_OUT] == XFRM_USERPOLICY_ACCEPT) + return true; + + return (skb_dst(skb)->flags & DST_NOXFRM) || + __xfrm_route_forward(skb, family); } static inline int xfrm4_route_forward(struct sk_buff *skb) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 02099d113a0a5..a6271b955e11d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3160,7 +3160,7 @@ ok: nopol: if (!(dst_orig->dev->flags & IFF_LOOPBACK) && - !xfrm_default_allow(net, dir)) { + net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) { err = -EPERM; goto error; } @@ -3572,7 +3572,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } if (!pol) { - if (!xfrm_default_allow(net, dir)) { + if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; } @@ -3632,7 +3632,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } xfrm_nr = ti; - if (!xfrm_default_allow(net, dir) && !xfrm_nr) { + if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK && + !xfrm_nr) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); goto reject; } @@ -4121,6 +4122,9 @@ static int __net_init xfrm_net_init(struct net *net) spin_lock_init(&net->xfrm.xfrm_policy_lock); seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); + net->xfrm.policy_default[XFRM_POLICY_IN] = XFRM_USERPOLICY_ACCEPT; + net->xfrm.policy_default[XFRM_POLICY_FWD] = XFRM_USERPOLICY_ACCEPT; + net->xfrm.policy_default[XFRM_POLICY_OUT] = XFRM_USERPOLICY_ACCEPT; rv = xfrm_statistics_init(net); if (rv < 0) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2acba159327cb..5fba82757ce5e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1993,12 +1993,9 @@ static int xfrm_notify_userpolicy(struct net *net) } up = nlmsg_data(nlh); - up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ? - XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; - up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ? - XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; - up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ? - XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; + up->in = net->xfrm.policy_default[XFRM_POLICY_IN]; + up->fwd = net->xfrm.policy_default[XFRM_POLICY_FWD]; + up->out = net->xfrm.policy_default[XFRM_POLICY_OUT]; nlmsg_end(skb, nlh); @@ -2009,26 +2006,26 @@ static int xfrm_notify_userpolicy(struct net *net) return err; } +static bool xfrm_userpolicy_is_valid(__u8 policy) +{ + return policy == XFRM_USERPOLICY_BLOCK || + policy == XFRM_USERPOLICY_ACCEPT; +} + static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { struct net *net = sock_net(skb->sk); struct xfrm_userpolicy_default *up = nlmsg_data(nlh); - if (up->in == XFRM_USERPOLICY_BLOCK) - net->xfrm.policy_default |= XFRM_POL_DEFAULT_IN; - else if (up->in == XFRM_USERPOLICY_ACCEPT) - net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_IN; + if (xfrm_userpolicy_is_valid(up->in)) + net->xfrm.policy_default[XFRM_POLICY_IN] = up->in; - if (up->fwd == XFRM_USERPOLICY_BLOCK) - net->xfrm.policy_default |= XFRM_POL_DEFAULT_FWD; - else if (up->fwd == XFRM_USERPOLICY_ACCEPT) - net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_FWD; + if (xfrm_userpolicy_is_valid(up->fwd)) + net->xfrm.policy_default[XFRM_POLICY_FWD] = up->fwd; - if (up->out == XFRM_USERPOLICY_BLOCK) - net->xfrm.policy_default |= XFRM_POL_DEFAULT_OUT; - else if (up->out == XFRM_USERPOLICY_ACCEPT) - net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_OUT; + if (xfrm_userpolicy_is_valid(up->out)) + net->xfrm.policy_default[XFRM_POLICY_OUT] = up->out; rt_genid_bump_all(net); @@ -2058,13 +2055,9 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, } r_up = nlmsg_data(r_nlh); - - r_up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ? - XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; - r_up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ? - XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; - r_up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ? - XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; + r_up->in = net->xfrm.policy_default[XFRM_POLICY_IN]; + r_up->fwd = net->xfrm.policy_default[XFRM_POLICY_FWD]; + r_up->out = net->xfrm.policy_default[XFRM_POLICY_OUT]; nlmsg_end(r_skb, r_nlh); return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid); -- GitLab From 952c2464963895271c31698970e7ec1ad6f0fe45 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Fri, 13 May 2022 23:34:02 +0300 Subject: [PATCH 0073/4335] xfrm: fix "disable_policy" flag use when arriving from different devices [ Upstream commit e6175a2ed1f18bf2f649625bf725e07adcfa6a28 ] In IPv4 setting the "disable_policy" flag on a device means no policy should be enforced for traffic originating from the device. This was implemented by seting the DST_NOPOLICY flag in the dst based on the originating device. However, dsts are cached in nexthops regardless of the originating devices, in which case, the DST_NOPOLICY flag value may be incorrect. Consider the following setup: +------------------------------+ | ROUTER | +-------------+ | +-----------------+ | | ipsec src |----|-|ipsec0 | | +-------------+ | |disable_policy=0 | +----+ | | +-----------------+ |eth1|-|----- +-------------+ | +-----------------+ +----+ | | noipsec src |----|-|eth0 | | +-------------+ | |disable_policy=1 | | | +-----------------+ | +------------------------------+ Where ROUTER has a default route towards eth1. dst entries for traffic arriving from eth0 would have DST_NOPOLICY and would be cached and therefore can be reused by traffic originating from ipsec0, skipping policy check. Fix by setting a IPSKB_NOPOLICY flag in IPCB and observing it instead of the DST in IN/FWD IPv4 policy checks. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Shmulik Ladkani Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- include/net/ip.h | 1 + include/net/xfrm.h | 14 +++++++++++++- net/ipv4/route.c | 23 ++++++++++++++++++----- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 0106c6590ee7b..a77a9e1c6c047 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -55,6 +55,7 @@ struct inet_skb_parm { #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) #define IPSKB_L3SLAVE BIT(7) +#define IPSKB_NOPOLICY BIT(8) u16 frag_max_size; }; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e03f0f8822262..65242172e41c2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1092,6 +1092,18 @@ static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb, return false; } +static inline bool __xfrm_check_dev_nopolicy(struct sk_buff *skb, + int dir, unsigned short family) +{ + if (dir != XFRM_POLICY_OUT && family == AF_INET) { + /* same dst may be used for traffic originating from + * devices with different policy settings. + */ + return IPCB(skb)->flags & IPSKB_NOPOLICY; + } + return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY); +} + static inline int __xfrm_policy_check2(struct sock *sk, int dir, struct sk_buff *skb, unsigned int family, int reverse) @@ -1103,7 +1115,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, return __xfrm_policy_check(sk, ndir, skb, family); return __xfrm_check_nopolicy(net, skb, dir) || - (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || + __xfrm_check_dev_nopolicy(skb, dir, family) || __xfrm_policy_check(sk, ndir, skb, family); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6e8020a3bd677..1db2fda228306 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1727,6 +1727,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct in_device *in_dev = __in_dev_get_rcu(dev); unsigned int flags = RTCF_MULTICAST; struct rtable *rth; + bool no_policy; u32 itag = 0; int err; @@ -1737,8 +1738,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (our) flags |= RTCF_LOCAL; + no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY); + if (no_policy) + IPCB(skb)->flags |= IPSKB_NOPOLICY; + rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, - IN_DEV_ORCONF(in_dev, NOPOLICY), false); + no_policy, false); if (!rth) return -ENOBUFS; @@ -1797,7 +1802,7 @@ static int __mkroute_input(struct sk_buff *skb, struct rtable *rth; int err; struct in_device *out_dev; - bool do_cache; + bool do_cache, no_policy; u32 itag = 0; /* get a working reference to the output device */ @@ -1842,6 +1847,10 @@ static int __mkroute_input(struct sk_buff *skb, } } + no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY); + if (no_policy) + IPCB(skb)->flags |= IPSKB_NOPOLICY; + fnhe = find_exception(nhc, daddr); if (do_cache) { if (fnhe) @@ -1854,8 +1863,7 @@ static int __mkroute_input(struct sk_buff *skb, } } - rth = rt_dst_alloc(out_dev->dev, 0, res->type, - IN_DEV_ORCONF(in_dev, NOPOLICY), + rth = rt_dst_alloc(out_dev->dev, 0, res->type, no_policy, IN_DEV_ORCONF(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; @@ -2230,6 +2238,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct rtable *rth; struct flowi4 fl4; bool do_cache = true; + bool no_policy; /* IP on this device is disabled. */ @@ -2347,6 +2356,10 @@ brd_input: RT_CACHE_STAT_INC(in_brd); local_input: + no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY); + if (no_policy) + IPCB(skb)->flags |= IPSKB_NOPOLICY; + do_cache &= res->fi && !itag; if (do_cache) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); @@ -2361,7 +2374,7 @@ local_input: rth = rt_dst_alloc(ip_rt_get_dev(net, res), flags | RTCF_LOCAL, res->type, - IN_DEV_ORCONF(in_dev, NOPOLICY), false); + no_policy, false); if (!rth) goto e_nobufs; -- GitLab From c22ee3a0674ca15dfc6dab625b8bc095d1c3b217 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 13 May 2022 11:27:06 +0200 Subject: [PATCH 0074/4335] net/sched: act_pedit: sanitize shift argument before usage [ Upstream commit 4d42d54a7d6aa6d29221d3fd4f2ae9503e94f011 ] syzbot was able to trigger an Out-of-Bound on the pedit action: UBSAN: shift-out-of-bounds in net/sched/act_pedit.c:238:43 shift exponent 1400735974 is too large for 32-bit type 'unsigned int' CPU: 0 PID: 3606 Comm: syz-executor151 Not tainted 5.18.0-rc5-syzkaller-00165-g810c2f0a3f86 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 ubsan_epilogue+0xb/0x50 lib/ubsan.c:151 __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x187 lib/ubsan.c:322 tcf_pedit_init.cold+0x1a/0x1f net/sched/act_pedit.c:238 tcf_action_init_1+0x414/0x690 net/sched/act_api.c:1367 tcf_action_init+0x530/0x8d0 net/sched/act_api.c:1432 tcf_action_add+0xf9/0x480 net/sched/act_api.c:1956 tc_ctl_action+0x346/0x470 net/sched/act_api.c:2015 rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5993 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2502 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:705 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:725 ____sys_sendmsg+0x6e2/0x800 net/socket.c:2413 ___sys_sendmsg+0xf3/0x170 net/socket.c:2467 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2496 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7fe36e9e1b59 Code: 28 c3 e8 2a 14 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffef796fe88 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fe36e9e1b59 RDX: 0000000000000000 RSI: 0000000020000300 RDI: 0000000000000003 RBP: 00007fe36e9a5d00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe36e9a5d90 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 The 'shift' field is not validated, and any value above 31 will trigger out-of-bounds. The issue predates the git history, but syzbot was able to trigger it only after the commit mentioned in the fixes tag, and this change only applies on top of such commit. Address the issue bounding the 'shift' value to the maximum allowed by the relevant operator. Reported-and-tested-by: syzbot+8ed8fc4c57e9dcf23ca6@syzkaller.appspotmail.com Fixes: 8b796475fd78 ("net/sched: act_pedit: really ensure the skb is writable") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/act_pedit.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index cfadd613644a5..1262a84b725fc 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -232,6 +232,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, for (i = 0; i < p->tcfp_nkeys; ++i) { u32 cur = p->tcfp_keys[i].off; + /* sanitize the shift value for any later use */ + p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1, + p->tcfp_keys[i].shift); + /* The AT option can read a single byte, we can bound the actual * value with uchar max. */ -- GitLab From 5f4197a020c049a59ea7907c31f9ab037dcefefe Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 May 2022 14:26:13 +0200 Subject: [PATCH 0075/4335] netfilter: flowtable: fix excessive hw offload attempts after failure [ Upstream commit 396ef64113a8ba01c46315d67a99db8dde3eef51 ] If a flow cannot be offloaded, the code currently repeatedly tries again as quickly as possible, which can significantly increase system load. Fix this by limiting flow timeout update and hardware offload retry to once per second. Fixes: c07531c01d82 ("netfilter: flowtable: Remove redundant hw refresh bit") Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_flow_table_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index b90eca7a2f22b..52e7f94d2450d 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -329,8 +329,10 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, u32 timeout; timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); - if (READ_ONCE(flow->timeout) != timeout) + if (timeout - READ_ONCE(flow->timeout) > HZ) WRITE_ONCE(flow->timeout, timeout); + else + return; if (likely(!nf_flowtable_hw_offload(flow_table))) return; -- GitLab From 7613dcaceee281973145588f4244f2f78ef85b7f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 May 2022 14:26:14 +0200 Subject: [PATCH 0076/4335] netfilter: nft_flow_offload: skip dst neigh lookup for ppp devices [ Upstream commit 45ca3e61999e9a30ca2b7cfbf9da8a9f8d13be31 ] The dst entry does not contain a valid hardware address, so skip the lookup in order to avoid running into errors here. The proper hardware address is filled in from nft_dev_path_info Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support") Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_flow_offload.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 0af34ad414796..dd824193c9208 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -36,6 +36,15 @@ static void nft_default_forward_path(struct nf_flow_route *route, route->tuple[dir].xmit_type = nft_xmit_type(dst_cache); } +static bool nft_is_valid_ether_device(const struct net_device *dev) +{ + if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || + dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr)) + return false; + + return true; +} + static int nft_dev_fill_forward_path(const struct nf_flow_route *route, const struct dst_entry *dst_cache, const struct nf_conn *ct, @@ -47,6 +56,9 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route, struct neighbour *n; u8 nud_state; + if (!nft_is_valid_ether_device(dev)) + goto out; + n = dst_neigh_lookup(dst_cache, daddr); if (!n) return -1; @@ -60,6 +72,7 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route, if (!(nud_state & NUD_VALID)) return -1; +out: return dev_fill_forward_path(dev, ha, stack); } @@ -78,15 +91,6 @@ struct nft_forward_info { enum flow_offload_xmit_type xmit_type; }; -static bool nft_is_valid_ether_device(const struct net_device *dev) -{ - if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || - dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr)) - return false; - - return true; -} - static void nft_dev_path_info(const struct net_device_path_stack *stack, struct nft_forward_info *info, unsigned char *ha, struct nf_flowtable *flowtable) -- GitLab From f96b2e06721249ebf8da3254cfef29dcb6583948 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 May 2022 14:26:15 +0200 Subject: [PATCH 0077/4335] net: fix dev_fill_forward_path with pppoe + bridge [ Upstream commit cf2df74e202d81b09f09d84c2d8903e0e87e9274 ] When calling dev_fill_forward_path on a pppoe device, the provided destination address is invalid. In order for the bridge fdb lookup to succeed, the pppoe code needs to update ctx->daddr to the correct value. Fix this by storing the address inside struct net_device_path_ctx Fixes: f6efc675c9dd ("net: ppp: resolve forwarding path for bridge pppoe devices") Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- drivers/net/ppp/pppoe.c | 1 + include/linux/netdevice.h | 2 +- net/core/dev.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 3619520340b74..e172743948ed7 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -988,6 +988,7 @@ static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx, path->encap.proto = htons(ETH_P_PPP_SES); path->encap.id = be16_to_cpu(po->num); memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN); + memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN); path->dev = ctx->dev; ctx->dev = dev; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 62ff094677762..39f1893ecac03 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -887,7 +887,7 @@ struct net_device_path_stack { struct net_device_path_ctx { const struct net_device *dev; - const u8 *daddr; + u8 daddr[ETH_ALEN]; int num_vlans; struct { diff --git a/net/core/dev.c b/net/core/dev.c index 804aba2228c25..5907212c00f37 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -741,11 +741,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, const struct net_device *last_dev; struct net_device_path_ctx ctx = { .dev = dev, - .daddr = daddr, }; struct net_device_path *path; int ret = 0; + memcpy(ctx.daddr, daddr, sizeof(ctx.daddr)); stack->num_paths = 0; while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) { last_dev = ctx.dev; -- GitLab From b329889974aed47e1167c85653c07097013e01a7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 May 2022 14:26:16 +0200 Subject: [PATCH 0078/4335] netfilter: nft_flow_offload: fix offload with pppoe + vlan [ Upstream commit 2456074935003b66c40f78df6adfc722435d43ea ] When running a combination of PPPoE on top of a VLAN, we need to set info->outdev to the PPPoE device, otherwise PPPoE encap is skipped during software offload. Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support") Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_flow_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index dd824193c9208..12145a80ef035 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -123,7 +123,8 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack, info->indev = NULL; break; } - info->outdev = path->dev; + if (!info->outdev) + info->outdev = path->dev; info->encap[info->num_encaps].id = path->encap.id; info->encap[info->num_encaps].proto = path->encap.proto; info->num_encaps++; -- GitLab From eb0ebbd4b0d4e38d958b8e49bc479711b7e796f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sun, 15 May 2022 14:58:15 +0200 Subject: [PATCH 0079/4335] Revert "PCI: aardvark: Rewrite IRQ code to chained IRQ handler" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a3b69dd0ad6265c29c4b6fb381cd76fb3bebdf8c ] This reverts commit 1571d67dc190e50c6c56e8f88cdc39f7cc53166e. This commit broke support for setting interrupt affinity. It looks like that it is related to the chained IRQ handler. Revert this commit until issue with setting interrupt affinity is fixed. Fixes: 1571d67dc190 ("PCI: aardvark: Rewrite IRQ code to chained IRQ handler") Link: https://lore.kernel.org/r/20220515125815.30157-1-pali@kernel.org Signed-off-by: Pali Rohár Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/controller/pci-aardvark.c | 48 ++++++++++++--------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index ff45052cf48de..7cc2c54daad03 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -272,7 +272,6 @@ struct advk_pcie { u32 actions; } wins[OB_WIN_COUNT]; u8 wins_count; - int irq; struct irq_domain *rp_irq_domain; struct irq_domain *irq_domain; struct irq_chip irq_chip; @@ -1572,26 +1571,21 @@ static void advk_pcie_handle_int(struct advk_pcie *pcie) } } -static void advk_pcie_irq_handler(struct irq_desc *desc) +static irqreturn_t advk_pcie_irq_handler(int irq, void *arg) { - struct advk_pcie *pcie = irq_desc_get_handler_data(desc); - struct irq_chip *chip = irq_desc_get_chip(desc); - u32 val, mask, status; + struct advk_pcie *pcie = arg; + u32 status; - chained_irq_enter(chip, desc); + status = advk_readl(pcie, HOST_CTRL_INT_STATUS_REG); + if (!(status & PCIE_IRQ_CORE_INT)) + return IRQ_NONE; - val = advk_readl(pcie, HOST_CTRL_INT_STATUS_REG); - mask = advk_readl(pcie, HOST_CTRL_INT_MASK_REG); - status = val & ((~mask) & PCIE_IRQ_ALL_MASK); + advk_pcie_handle_int(pcie); - if (status & PCIE_IRQ_CORE_INT) { - advk_pcie_handle_int(pcie); + /* Clear interrupt */ + advk_writel(pcie, PCIE_IRQ_CORE_INT, HOST_CTRL_INT_STATUS_REG); - /* Clear interrupt */ - advk_writel(pcie, PCIE_IRQ_CORE_INT, HOST_CTRL_INT_STATUS_REG); - } - - chained_irq_exit(chip, desc); + return IRQ_HANDLED; } static int advk_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) @@ -1673,7 +1667,7 @@ static int advk_pcie_probe(struct platform_device *pdev) struct advk_pcie *pcie; struct pci_host_bridge *bridge; struct resource_entry *entry; - int ret; + int ret, irq; bridge = devm_pci_alloc_host_bridge(dev, sizeof(struct advk_pcie)); if (!bridge) @@ -1759,9 +1753,17 @@ static int advk_pcie_probe(struct platform_device *pdev) if (IS_ERR(pcie->base)) return PTR_ERR(pcie->base); - pcie->irq = platform_get_irq(pdev, 0); - if (pcie->irq < 0) - return pcie->irq; + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + ret = devm_request_irq(dev, irq, advk_pcie_irq_handler, + IRQF_SHARED | IRQF_NO_THREAD, "advk-pcie", + pcie); + if (ret) { + dev_err(dev, "Failed to register interrupt\n"); + return ret; + } pcie->reset_gpio = devm_gpiod_get_from_of_node(dev, dev->of_node, "reset-gpios", 0, @@ -1818,15 +1820,12 @@ static int advk_pcie_probe(struct platform_device *pdev) return ret; } - irq_set_chained_handler_and_data(pcie->irq, advk_pcie_irq_handler, pcie); - bridge->sysdata = pcie; bridge->ops = &advk_pcie_ops; bridge->map_irq = advk_pcie_map_irq; ret = pci_host_probe(bridge); if (ret < 0) { - irq_set_chained_handler_and_data(pcie->irq, NULL, NULL); advk_pcie_remove_rp_irq_domain(pcie); advk_pcie_remove_msi_irq_domain(pcie); advk_pcie_remove_irq_domain(pcie); @@ -1875,9 +1874,6 @@ static int advk_pcie_remove(struct platform_device *pdev) advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_REG); advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_STATUS_REG); - /* Remove IRQ handler */ - irq_set_chained_handler_and_data(pcie->irq, NULL, NULL); - /* Remove IRQ domains */ advk_pcie_remove_rp_irq_domain(pcie); advk_pcie_remove_msi_irq_domain(pcie); -- GitLab From 2c560e90086f8f0761ace9546ecd78a2f9b50907 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 15 May 2022 19:01:56 +0200 Subject: [PATCH 0080/4335] net: systemport: Fix an error handling path in bcm_sysport_probe() [ Upstream commit ef6b1cd11962aec21c58d137006ab122dbc8d6fd ] if devm_clk_get_optional() fails, we still need to go through the error handling path. Add the missing goto. Fixes: 6328a126896ea ("net: systemport: Manage Wake-on-LAN clock") Signed-off-by: Christophe JAILLET Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/99d70634a81c229885ae9e4ee69b2035749f7edc.1652634040.git.christophe.jaillet@wanadoo.fr Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bcmsysport.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 0877b3d7f88c5..ae541a9d1eeed 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2585,8 +2585,10 @@ static int bcm_sysport_probe(struct platform_device *pdev) device_set_wakeup_capable(&pdev->dev, 1); priv->wol_clk = devm_clk_get_optional(&pdev->dev, "sw_sysportwol"); - if (IS_ERR(priv->wol_clk)) - return PTR_ERR(priv->wol_clk); + if (IS_ERR(priv->wol_clk)) { + ret = PTR_ERR(priv->wol_clk); + goto err_deregister_fixed_link; + } /* Set the needed headroom once and for all */ BUILD_BUG_ON(sizeof(struct bcm_tsb) != 8); -- GitLab From 4ad09fdef55b70f16f8d385981b864ac75cf1354 Mon Sep 17 00:00:00 2001 From: Zixuan Fu Date: Sat, 14 May 2022 13:06:56 +0800 Subject: [PATCH 0081/4335] net: vmxnet3: fix possible use-after-free bugs in vmxnet3_rq_alloc_rx_buf() [ Upstream commit 9e7fef9521e73ca8afd7da9e58c14654b02dfad8 ] In vmxnet3_rq_alloc_rx_buf(), when dma_map_single() fails, rbi->skb is freed immediately. Similarly, in another branch, when dma_map_page() fails, rbi->page is also freed. In the two cases, vmxnet3_rq_alloc_rx_buf() returns an error to its callers vmxnet3_rq_init() -> vmxnet3_rq_init_all() -> vmxnet3_activate_dev(). Then vmxnet3_activate_dev() calls vmxnet3_rq_cleanup_all() in error handling code, and rbi->skb or rbi->page are freed again in vmxnet3_rq_cleanup_all(), causing use-after-free bugs. To fix these possible bugs, rbi->skb and rbi->page should be cleared after they are freed. The error log in our fault-injection testing is shown as follows: [ 14.319016] BUG: KASAN: use-after-free in consume_skb+0x2f/0x150 ... [ 14.321586] Call Trace: ... [ 14.325357] consume_skb+0x2f/0x150 [ 14.325671] vmxnet3_rq_cleanup_all+0x33a/0x4e0 [vmxnet3] [ 14.326150] vmxnet3_activate_dev+0xb9d/0x2ca0 [vmxnet3] [ 14.326616] vmxnet3_open+0x387/0x470 [vmxnet3] ... [ 14.361675] Allocated by task 351: ... [ 14.362688] __netdev_alloc_skb+0x1b3/0x6f0 [ 14.362960] vmxnet3_rq_alloc_rx_buf+0x1b0/0x8d0 [vmxnet3] [ 14.363317] vmxnet3_activate_dev+0x3e3/0x2ca0 [vmxnet3] [ 14.363661] vmxnet3_open+0x387/0x470 [vmxnet3] ... [ 14.367309] [ 14.367412] Freed by task 351: ... [ 14.368932] __dev_kfree_skb_any+0xd2/0xe0 [ 14.369193] vmxnet3_rq_alloc_rx_buf+0x71e/0x8d0 [vmxnet3] [ 14.369544] vmxnet3_activate_dev+0x3e3/0x2ca0 [vmxnet3] [ 14.369883] vmxnet3_open+0x387/0x470 [vmxnet3] [ 14.370174] __dev_open+0x28a/0x420 [ 14.370399] __dev_change_flags+0x192/0x590 [ 14.370667] dev_change_flags+0x7a/0x180 [ 14.370919] do_setlink+0xb28/0x3570 [ 14.371150] rtnl_newlink+0x1160/0x1740 [ 14.371399] rtnetlink_rcv_msg+0x5bf/0xa50 [ 14.371661] netlink_rcv_skb+0x1cd/0x3e0 [ 14.371913] netlink_unicast+0x5dc/0x840 [ 14.372169] netlink_sendmsg+0x856/0xc40 [ 14.372420] ____sys_sendmsg+0x8a7/0x8d0 [ 14.372673] __sys_sendmsg+0x1c2/0x270 [ 14.372914] do_syscall_64+0x41/0x90 [ 14.373145] entry_SYSCALL_64_after_hwframe+0x44/0xae ... Fixes: 5738a09d58d5a ("vmxnet3: fix checks for dma mapping errors") Reported-by: TOTE Robot Signed-off-by: Zixuan Fu Link: https://lore.kernel.org/r/20220514050656.2636588-1-r33s3n6@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/vmxnet3/vmxnet3_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 5b0215b7c1761..8ab86bbdbf5ee 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -589,6 +589,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, if (dma_mapping_error(&adapter->pdev->dev, rbi->dma_addr)) { dev_kfree_skb_any(rbi->skb); + rbi->skb = NULL; rq->stats.rx_buf_alloc_failure++; break; } @@ -613,6 +614,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, if (dma_mapping_error(&adapter->pdev->dev, rbi->dma_addr)) { put_page(rbi->page); + rbi->page = NULL; rq->stats.rx_buf_alloc_failure++; break; } -- GitLab From e35387a91318ccdec4a30b58d967391e011e34fa Mon Sep 17 00:00:00 2001 From: Zixuan Fu Date: Sat, 14 May 2022 13:07:11 +0800 Subject: [PATCH 0082/4335] net: vmxnet3: fix possible NULL pointer dereference in vmxnet3_rq_cleanup() [ Upstream commit edf410cb74dc612fd47ef5be319c5a0bcd6e6ccd ] In vmxnet3_rq_create(), when dma_alloc_coherent() fails, vmxnet3_rq_destroy() is called. It sets rq->rx_ring[i].base to NULL. Then vmxnet3_rq_create() returns an error to its callers mxnet3_rq_create_all() -> vmxnet3_change_mtu(). Then vmxnet3_change_mtu() calls vmxnet3_force_close() -> dev_close() in error handling code. And the driver calls vmxnet3_close() -> vmxnet3_quiesce_dev() -> vmxnet3_rq_cleanup_all() -> vmxnet3_rq_cleanup(). In vmxnet3_rq_cleanup(), rq->rx_ring[ring_idx].base is accessed, but this variable is NULL, causing a NULL pointer dereference. To fix this possible bug, an if statement is added to check whether rq->rx_ring[0].base is NULL in vmxnet3_rq_cleanup() and exit early if so. The error log in our fault-injection testing is shown as follows: [ 65.220135] BUG: kernel NULL pointer dereference, address: 0000000000000008 ... [ 65.222633] RIP: 0010:vmxnet3_rq_cleanup_all+0x396/0x4e0 [vmxnet3] ... [ 65.227977] Call Trace: ... [ 65.228262] vmxnet3_quiesce_dev+0x80f/0x8a0 [vmxnet3] [ 65.228580] vmxnet3_close+0x2c4/0x3f0 [vmxnet3] [ 65.228866] __dev_close_many+0x288/0x350 [ 65.229607] dev_close_many+0xa4/0x480 [ 65.231124] dev_close+0x138/0x230 [ 65.231933] vmxnet3_force_close+0x1f0/0x240 [vmxnet3] [ 65.232248] vmxnet3_change_mtu+0x75d/0x920 [vmxnet3] ... Fixes: d1a890fa37f27 ("net: VMware virtual Ethernet NIC driver: vmxnet3") Reported-by: TOTE Robot Signed-off-by: Zixuan Fu Link: https://lore.kernel.org/r/20220514050711.2636709-1-r33s3n6@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/vmxnet3/vmxnet3_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 8ab86bbdbf5ee..bc3192cf48e3e 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1668,6 +1668,10 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq, u32 i, ring_idx; struct Vmxnet3_RxDesc *rxd; + /* ring has already been cleaned up */ + if (!rq->rx_ring[0].base) + return; + for (ring_idx = 0; ring_idx < 2; ring_idx++) { for (i = 0; i < rq->rx_ring[ring_idx].size; i++) { #ifdef __BIG_ENDIAN_BITFIELD -- GitLab From 12fd5b1121177f70745cdd97f2326d020d1c8e25 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Thu, 28 Apr 2022 10:33:50 +0200 Subject: [PATCH 0083/4335] ice: fix crash when writing timestamp on RX rings [ Upstream commit 4503cc7fdf9a84cd631b0cb8ecb3c9b1bdbf3594 ] Do not allow to write timestamps on RX rings if PF is being configured. When PF is being configured RX rings can be freed or rebuilt. If at the same time timestamps are updated, the kernel will crash by dereferencing null RX ring pointer. PID: 1449 TASK: ff187d28ed658040 CPU: 34 COMMAND: "ice-ptp-0000:51" #0 [ff1966a94a713bb0] machine_kexec at ffffffff9d05a0be #1 [ff1966a94a713c08] __crash_kexec at ffffffff9d192e9d #2 [ff1966a94a713cd0] crash_kexec at ffffffff9d1941bd #3 [ff1966a94a713ce8] oops_end at ffffffff9d01bd54 #4 [ff1966a94a713d08] no_context at ffffffff9d06bda4 #5 [ff1966a94a713d60] __bad_area_nosemaphore at ffffffff9d06c10c #6 [ff1966a94a713da8] do_page_fault at ffffffff9d06cae4 #7 [ff1966a94a713de0] page_fault at ffffffff9da0107e [exception RIP: ice_ptp_update_cached_phctime+91] RIP: ffffffffc076db8b RSP: ff1966a94a713e98 RFLAGS: 00010246 RAX: 16e3db9c6b7ccae4 RBX: ff187d269dd3c180 RCX: ff187d269cd4d018 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ff187d269cfcc644 R8: ff187d339b9641b0 R9: 0000000000000000 R10: 0000000000000002 R11: 0000000000000000 R12: ff187d269cfcc648 R13: ffffffff9f128784 R14: ffffffff9d101b70 R15: ff187d269cfcc640 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #8 [ff1966a94a713ea0] ice_ptp_periodic_work at ffffffffc076dbef [ice] #9 [ff1966a94a713ee0] kthread_worker_fn at ffffffff9d101c1b #10 [ff1966a94a713f10] kthread at ffffffff9d101b4d #11 [ff1966a94a713f50] ret_from_fork at ffffffff9da0023f Fixes: 77a781155a65 ("ice: enable receive hardware timestamping") Signed-off-by: Arkadiusz Kubalewski Reviewed-by: Michal Schmidt Tested-by: Dave Cain Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_ptp.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index ef26ff351b57e..9b50e9e6042a5 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -254,12 +254,19 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts) * This function must be called periodically to ensure that the cached value * is never more than 2 seconds old. It must also be called whenever the PHC * time has been changed. + * + * Return: + * * 0 - OK, successfully updated + * * -EAGAIN - PF was busy, need to reschedule the update */ -static void ice_ptp_update_cached_phctime(struct ice_pf *pf) +static int ice_ptp_update_cached_phctime(struct ice_pf *pf) { u64 systime; int i; + if (test_and_set_bit(ICE_CFG_BUSY, pf->state)) + return -EAGAIN; + /* Read the current PHC time */ systime = ice_ptp_read_src_clk_reg(pf, NULL); @@ -282,6 +289,9 @@ static void ice_ptp_update_cached_phctime(struct ice_pf *pf) WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime); } } + clear_bit(ICE_CFG_BUSY, pf->state); + + return 0; } /** @@ -1418,17 +1428,18 @@ static void ice_ptp_periodic_work(struct kthread_work *work) { struct ice_ptp *ptp = container_of(work, struct ice_ptp, work.work); struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp); + int err; if (!test_bit(ICE_FLAG_PTP, pf->flags)) return; - ice_ptp_update_cached_phctime(pf); + err = ice_ptp_update_cached_phctime(pf); ice_ptp_tx_tstamp_cleanup(&pf->hw, &pf->ptp.port.tx); - /* Run twice a second */ + /* Run twice a second or reschedule if phc update failed */ kthread_queue_delayed_work(ptp->kworker, &ptp->work, - msecs_to_jiffies(500)); + msecs_to_jiffies(err ? 10 : 500)); } /** -- GitLab From 737fbf0c32638e72fc1fb7e278a82f97957e323a Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Thu, 28 Apr 2022 14:11:42 -0700 Subject: [PATCH 0084/4335] ice: fix possible under reporting of ethtool Tx and Rx statistics [ Upstream commit 31b6298fd8e29effe9ed6b77351ac5969be56ce0 ] The hardware statistics counters are not cleared during resets so the drivers first access is to initialize the baseline and then subsequent reads are for reporting the counters. The statistics counters are read during the watchdog subtask when the interface is up. If the baseline is not initialized before the interface is up, then there can be a brief window in which some traffic can be transmitted/received before the initial baseline reading takes place. Directly initialize ethtool statistics in driver open so the baseline will be initialized when the interface is up, and any dropped packets incremented before the interface is up won't be reported. Fixes: 28dc1b86f8ea9 ("ice: ignore dropped packets during init") Signed-off-by: Paul Greenwalt Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 27b5c75ce3868..188abf36a5b23 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5656,9 +5656,10 @@ static int ice_up_complete(struct ice_vsi *vsi) netif_carrier_on(vsi->netdev); } - /* clear this now, and the first stats read will be used as baseline */ - vsi->stat_offsets_loaded = false; - + /* Perform an initial read of the statistics registers now to + * set the baseline so counters are ready when interface is up + */ + ice_update_eth_stats(vsi); ice_service_task_schedule(pf); return 0; -- GitLab From a2fd0fb3baaf2ddc9e9a2f4a606443adfb7ba172 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 19 Aug 2021 13:59:57 +0200 Subject: [PATCH 0085/4335] ice: move ice_container_type onto ice_ring_container [ Upstream commit dc23715cf30a9acb808f5b08962877c390d3e6ea ] Currently ice_container_type is scoped only for ice_ethtool.c. Next commit that will split the ice_ring struct onto Rx/Tx specific ring structs is going to also modify the type of linked list of rings that is within ice_ring_container. Therefore, the functions that are taking the ice_ring_container as an input argument will need to be aware of a ring type that will be looked up. Embed ice_container_type within ice_ring_container and initialize it properly when allocating the q_vectors. Signed-off-by: Maciej Fijalkowski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_base.c | 2 ++ drivers/net/ethernet/intel/ice/ice_ethtool.c | 38 ++++++++------------ drivers/net/ethernet/intel/ice/ice_txrx.h | 6 ++++ 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index f74610442bda7..533a953f15acb 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -115,6 +115,8 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) q_vector->rx.itr_setting = ICE_DFLT_RX_ITR; q_vector->tx.itr_mode = ITR_DYNAMIC; q_vector->rx.itr_mode = ITR_DYNAMIC; + q_vector->tx.type = ICE_TX_CONTAINER; + q_vector->rx.type = ICE_RX_CONTAINER; if (vsi->type == ICE_VSI_VF) goto out; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 38c2d9a5574a0..19f115402969e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3466,15 +3466,9 @@ static int ice_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return 0; } -enum ice_container_type { - ICE_RX_CONTAINER, - ICE_TX_CONTAINER, -}; - /** * ice_get_rc_coalesce - get ITR values for specific ring container * @ec: ethtool structure to fill with driver's coalesce settings - * @c_type: container type, Rx or Tx * @rc: ring container that the ITR values will come from * * Query the device for ice_ring_container specific ITR values. This is @@ -3484,13 +3478,12 @@ enum ice_container_type { * Returns 0 on success, negative otherwise. */ static int -ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, - struct ice_ring_container *rc) +ice_get_rc_coalesce(struct ethtool_coalesce *ec, struct ice_ring_container *rc) { if (!rc->ring) return -EINVAL; - switch (c_type) { + switch (rc->type) { case ICE_RX_CONTAINER: ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc); ec->rx_coalesce_usecs = rc->itr_setting; @@ -3501,7 +3494,7 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, ec->tx_coalesce_usecs = rc->itr_setting; break; default: - dev_dbg(ice_pf_to_dev(rc->ring->vsi->back), "Invalid c_type %d\n", c_type); + dev_dbg(ice_pf_to_dev(rc->ring->vsi->back), "Invalid c_type %d\n", rc->type); return -EINVAL; } @@ -3522,18 +3515,18 @@ static int ice_get_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num) { if (q_num < vsi->num_rxq && q_num < vsi->num_txq) { - if (ice_get_rc_coalesce(ec, ICE_RX_CONTAINER, + if (ice_get_rc_coalesce(ec, &vsi->rx_rings[q_num]->q_vector->rx)) return -EINVAL; - if (ice_get_rc_coalesce(ec, ICE_TX_CONTAINER, + if (ice_get_rc_coalesce(ec, &vsi->tx_rings[q_num]->q_vector->tx)) return -EINVAL; } else if (q_num < vsi->num_rxq) { - if (ice_get_rc_coalesce(ec, ICE_RX_CONTAINER, + if (ice_get_rc_coalesce(ec, &vsi->rx_rings[q_num]->q_vector->rx)) return -EINVAL; } else if (q_num < vsi->num_txq) { - if (ice_get_rc_coalesce(ec, ICE_TX_CONTAINER, + if (ice_get_rc_coalesce(ec, &vsi->tx_rings[q_num]->q_vector->tx)) return -EINVAL; } else { @@ -3585,7 +3578,6 @@ ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num, /** * ice_set_rc_coalesce - set ITR values for specific ring container - * @c_type: container type, Rx or Tx * @ec: ethtool structure from user to update ITR settings * @rc: ring container that the ITR values will come from * @vsi: VSI associated to the ring container @@ -3597,10 +3589,10 @@ ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num, * Returns 0 on success, negative otherwise. */ static int -ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, +ice_set_rc_coalesce(struct ethtool_coalesce *ec, struct ice_ring_container *rc, struct ice_vsi *vsi) { - const char *c_type_str = (c_type == ICE_RX_CONTAINER) ? "rx" : "tx"; + const char *c_type_str = (rc->type == ICE_RX_CONTAINER) ? "rx" : "tx"; u32 use_adaptive_coalesce, coalesce_usecs; struct ice_pf *pf = vsi->back; u16 itr_setting; @@ -3608,7 +3600,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, if (!rc->ring) return -EINVAL; - switch (c_type) { + switch (rc->type) { case ICE_RX_CONTAINER: if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL || (ec->rx_coalesce_usecs_high && @@ -3641,7 +3633,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, break; default: dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n", - c_type); + rc->type); return -EINVAL; } @@ -3690,22 +3682,22 @@ static int ice_set_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num) { if (q_num < vsi->num_rxq && q_num < vsi->num_txq) { - if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec, + if (ice_set_rc_coalesce(ec, &vsi->rx_rings[q_num]->q_vector->rx, vsi)) return -EINVAL; - if (ice_set_rc_coalesce(ICE_TX_CONTAINER, ec, + if (ice_set_rc_coalesce(ec, &vsi->tx_rings[q_num]->q_vector->tx, vsi)) return -EINVAL; } else if (q_num < vsi->num_rxq) { - if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec, + if (ice_set_rc_coalesce(ec, &vsi->rx_rings[q_num]->q_vector->rx, vsi)) return -EINVAL; } else if (q_num < vsi->num_txq) { - if (ice_set_rc_coalesce(ICE_TX_CONTAINER, ec, + if (ice_set_rc_coalesce(ec, &vsi->tx_rings[q_num]->q_vector->tx, vsi)) return -EINVAL; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 7c2328529ff8e..69f78a1c234f9 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -332,6 +332,11 @@ static inline bool ice_ring_is_xdp(struct ice_ring *ring) return !!(ring->flags & ICE_TX_FLAGS_RING_XDP); } +enum ice_container_type { + ICE_RX_CONTAINER, + ICE_TX_CONTAINER, +}; + struct ice_ring_container { /* head of linked-list of rings */ struct ice_ring *ring; @@ -343,6 +348,7 @@ struct ice_ring_container { u16 itr_setting:13; u16 itr_reserved:2; u16 itr_mode:1; + enum ice_container_type type; }; struct ice_coalesce_stored { -- GitLab From 2b037a3bedf8cf210cd4007eb2fc1f2088df60d5 Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Sun, 8 May 2022 19:33:48 -0400 Subject: [PATCH 0086/4335] ice: Fix interrupt moderation settings getting cleared [ Upstream commit bf13502ed5f941b0777b3fd1e24dac5d93f3886c ] Adaptive-rx and Adaptive-tx are interrupt moderation settings that can be enabled/disabled using ethtool: ethtool -C ethX adaptive-rx on/off adaptive-tx on/off Unfortunately those settings are getting cleared after changing number of queues, or in ethtool world 'channels': ethtool -L ethX rx 1 tx 1 Clearing was happening due to introduction of bit fields in ice_ring_container struct. This way only itr_setting bits were rebuilt during ice_vsi_rebuild_set_coalesce(). Introduce an anonymous struct of bitfields and create a union to refer to them as a single variable. This way variable can be easily saved and restored. Fixes: 61dc79ced7aa ("ice: Restore interrupt throttle settings after VSI rebuild") Signed-off-by: Michal Wilczynski Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_lib.c | 16 ++++++++-------- drivers/net/ethernet/intel/ice/ice_txrx.h | 11 ++++++++--- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 653996e8fd309..4417238b0e64f 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2980,8 +2980,8 @@ ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi, ice_for_each_q_vector(vsi, i) { struct ice_q_vector *q_vector = vsi->q_vectors[i]; - coalesce[i].itr_tx = q_vector->tx.itr_setting; - coalesce[i].itr_rx = q_vector->rx.itr_setting; + coalesce[i].itr_tx = q_vector->tx.itr_settings; + coalesce[i].itr_rx = q_vector->rx.itr_settings; coalesce[i].intrl = q_vector->intrl; if (i < vsi->num_txq) @@ -3037,21 +3037,21 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, */ if (i < vsi->alloc_rxq && coalesce[i].rx_valid) { rc = &vsi->q_vectors[i]->rx; - rc->itr_setting = coalesce[i].itr_rx; + rc->itr_settings = coalesce[i].itr_rx; ice_write_itr(rc, rc->itr_setting); } else if (i < vsi->alloc_rxq) { rc = &vsi->q_vectors[i]->rx; - rc->itr_setting = coalesce[0].itr_rx; + rc->itr_settings = coalesce[0].itr_rx; ice_write_itr(rc, rc->itr_setting); } if (i < vsi->alloc_txq && coalesce[i].tx_valid) { rc = &vsi->q_vectors[i]->tx; - rc->itr_setting = coalesce[i].itr_tx; + rc->itr_settings = coalesce[i].itr_tx; ice_write_itr(rc, rc->itr_setting); } else if (i < vsi->alloc_txq) { rc = &vsi->q_vectors[i]->tx; - rc->itr_setting = coalesce[0].itr_tx; + rc->itr_settings = coalesce[0].itr_tx; ice_write_itr(rc, rc->itr_setting); } @@ -3065,12 +3065,12 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, for (; i < vsi->num_q_vectors; i++) { /* transmit */ rc = &vsi->q_vectors[i]->tx; - rc->itr_setting = coalesce[0].itr_tx; + rc->itr_settings = coalesce[0].itr_tx; ice_write_itr(rc, rc->itr_setting); /* receive */ rc = &vsi->q_vectors[i]->rx; - rc->itr_setting = coalesce[0].itr_rx; + rc->itr_settings = coalesce[0].itr_rx; ice_write_itr(rc, rc->itr_setting); vsi->q_vectors[i]->intrl = coalesce[0].intrl; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 69f78a1c234f9..4adc3dff04ba7 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -345,9 +345,14 @@ struct ice_ring_container { /* this matches the maximum number of ITR bits, but in usec * values, so it is shifted left one bit (bit zero is ignored) */ - u16 itr_setting:13; - u16 itr_reserved:2; - u16 itr_mode:1; + union { + struct { + u16 itr_setting:13; + u16 itr_reserved:2; + u16 itr_mode:1; + }; + u16 itr_settings; + }; enum ice_container_type type; }; -- GitLab From 212ca7bb55d3d19ac2e9ffe798f486590c82bcbb Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Wed, 13 Apr 2022 10:13:18 +0300 Subject: [PATCH 0087/4335] clk: at91: generated: consider range when calculating best rate [ Upstream commit d0031e6fbed955ff8d5f5bbc8fe7382482559cec ] clk_generated_best_diff() helps in finding the parent and the divisor to compute a rate closest to the required one. However, it doesn't take into account the request's range for the new rate. Make sure the new rate is within the required range. Fixes: 8a8f4bf0c480 ("clk: at91: clk-generated: create function to find best_diff") Signed-off-by: Codrin Ciubotariu Link: https://lore.kernel.org/r/20220413071318.244912-1-codrin.ciubotariu@microchip.com Reviewed-by: Claudiu Beznea Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/at91/clk-generated.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index b656d25a97678..fe772baeb15ff 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -106,6 +106,10 @@ static void clk_generated_best_diff(struct clk_rate_request *req, tmp_rate = parent_rate; else tmp_rate = parent_rate / div; + + if (tmp_rate < req->min_rate || tmp_rate > req->max_rate) + return; + tmp_diff = abs(req->rate - tmp_rate); if (*best_diff < 0 || *best_diff >= tmp_diff) { -- GitLab From 1b0c87de277ea2fc5635893502c81bdcaf1d3181 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 15 May 2022 20:07:02 +0200 Subject: [PATCH 0088/4335] net/qla3xxx: Fix a test in ql_reset_work() [ Upstream commit 5361448e45fac6fb96738df748229432a62d78b6 ] test_bit() tests if one bit is set or not. Here the logic seems to check of bit QL_RESET_PER_SCSI (i.e. 4) OR bit QL_RESET_START (i.e. 3) is set. In fact, it checks if bit 7 (4 | 3 = 7) is set, that is to say QL_ADAPTER_UP. This looks harmless, because this bit is likely be set, and when the ql_reset_work() delayed work is scheduled in ql3xxx_isr() (the only place that schedule this work), QL_RESET_START or QL_RESET_PER_SCSI is set. This has been spotted by smatch. Fixes: 5a4faa873782 ("[PATCH] qla3xxx NIC driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/80e73e33f390001d9c0140ffa9baddf6466a41a2.1652637337.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qla3xxx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 4eb9ea280474f..40d14d80f6f1f 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3612,7 +3612,8 @@ static void ql_reset_work(struct work_struct *work) qdev->mem_map_registers; unsigned long hw_flags; - if (test_bit((QL_RESET_PER_SCSI | QL_RESET_START), &qdev->flags)) { + if (test_bit(QL_RESET_PER_SCSI, &qdev->flags) || + test_bit(QL_RESET_START, &qdev->flags)) { clear_bit(QL_LINK_MASTER, &qdev->flags); /* -- GitLab From 2372405955f9c9c12c35885720553c34ff79931f Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Tue, 17 May 2022 09:25:30 +0800 Subject: [PATCH 0089/4335] NFC: nci: fix sleep in atomic context bugs caused by nci_skb_alloc [ Upstream commit 23dd4581350d4ffa23d58976ec46408f8f4c1e16 ] There are sleep in atomic context bugs when the request to secure element of st-nci is timeout. The root cause is that nci_skb_alloc with GFP_KERNEL parameter is called in st_nci_se_wt_timeout which is a timer handler. The call paths that could trigger bugs are shown below: (interrupt context 1) st_nci_se_wt_timeout nci_hci_send_event nci_hci_send_data nci_skb_alloc(..., GFP_KERNEL) //may sleep (interrupt context 2) st_nci_se_wt_timeout nci_hci_send_event nci_hci_send_data nci_send_data nci_queue_tx_data_frags nci_skb_alloc(..., GFP_KERNEL) //may sleep This patch changes allocation mode of nci_skb_alloc from GFP_KERNEL to GFP_ATOMIC in order to prevent atomic context sleeping. The GFP_ATOMIC flag makes memory allocation operation could be used in atomic context. Fixes: ed06aeefdac3 ("nfc: st-nci: Rename st21nfcb to st-nci") Signed-off-by: Duoming Zhou Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220517012530.75714-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/nfc/nci/data.c | 2 +- net/nfc/nci/hci.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index 6055dc9a82aa0..aa5e712adf078 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -118,7 +118,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev, skb_frag = nci_skb_alloc(ndev, (NCI_DATA_HDR_SIZE + frag_len), - GFP_KERNEL); + GFP_ATOMIC); if (skb_frag == NULL) { rc = -ENOMEM; goto free_exit; diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index e199912ee1e59..85b808fdcbc3a 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -153,7 +153,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, i = 0; skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + - NCI_DATA_HDR_SIZE, GFP_KERNEL); + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; @@ -184,7 +184,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, if (i < data_len) { skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + - NCI_DATA_HDR_SIZE, GFP_KERNEL); + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; -- GitLab From 9cec84ed7ca7dcbe405e9ef5904298e7d210f127 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Mon, 21 Mar 2022 10:07:44 +0200 Subject: [PATCH 0090/4335] net/mlx5: DR, Fix missing flow_source when creating multi-destination FW table [ Upstream commit 2c5fc6cd269ad3476da99dad02521d2af4a8e906 ] In order to support multiple destination FTEs with SW steering FW table is created with single FTE with multiple actions and SW steering rule forward to it. When creating this table, flow source isn't set according to the original FTE. Fix this by passing the original FTE flow source to the created FW table. Fixes: 34583beea4b7 ("net/mlx5: DR, Create multi-destination table for SW-steering use") Signed-off-by: Maor Dickman Reviewed-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 6 ++++-- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h | 3 ++- 5 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index a5b9f65db23c6..897c7f8521238 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -846,7 +846,8 @@ struct mlx5dr_action * mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, struct mlx5dr_action_dest *dests, u32 num_of_dests, - bool ignore_flow_level) + bool ignore_flow_level, + u32 flow_source) { struct mlx5dr_cmd_flow_destination_hw_info *hw_dests; struct mlx5dr_action **ref_actions; @@ -914,7 +915,8 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, reformat_req, &action->dest_tbl->fw_tbl.id, &action->dest_tbl->fw_tbl.group_id, - ignore_flow_level); + ignore_flow_level, + flow_source); if (ret) goto free_action; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c index 0d6f86eb248b9..c74083de1801b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c @@ -104,7 +104,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, bool reformat_req, u32 *tbl_id, u32 *group_id, - bool ignore_flow_level) + bool ignore_flow_level, + u32 flow_source) { struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; struct mlx5dr_cmd_fte_info fte_info = {}; @@ -139,6 +140,7 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, fte_info.val = val; fte_info.dest_arr = dest; fte_info.ignore_flow_level = ignore_flow_level; + fte_info.flow_context.flow_source = flow_source; ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info); if (ret) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 3d4e035698dd3..bc206836af6ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -1394,7 +1394,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, bool reformat_req, u32 *tbl_id, u32 *group_id, - bool ignore_flow_level); + bool ignore_flow_level, + u32 flow_source); void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id, u32 group_id); #endif /* _DR_TYPES_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 7e58f4e594b74..ae4597118f8b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -492,11 +492,13 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, } else if (num_term_actions > 1) { bool ignore_flow_level = !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL); + u32 flow_source = fte->flow_context.flow_source; tmp_action = mlx5dr_action_create_mult_dest_tbl(domain, term_actions, num_term_actions, - ignore_flow_level); + ignore_flow_level, + flow_source); if (!tmp_action) { err = -EOPNOTSUPP; goto free_actions; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index 5ef1995434794..7806e5c05b677 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -96,7 +96,8 @@ struct mlx5dr_action * mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, struct mlx5dr_action_dest *dests, u32 num_of_dests, - bool ignore_flow_level); + bool ignore_flow_level, + u32 flow_source); struct mlx5dr_action *mlx5dr_action_create_drop(void); -- GitLab From 4048778318167e535eb9c012dabbc986fe8452e3 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 12 Apr 2022 18:37:03 +0300 Subject: [PATCH 0091/4335] net/mlx5e: Properly block LRO when XDP is enabled [ Upstream commit cf6e34c8c22fba66bd21244b95ea47e235f68974 ] LRO is incompatible and mutually exclusive with XDP. However, the needed checks are only made when enabling XDP. If LRO is enabled when XDP is already active, the command will succeed, and XDP will be skipped in the data path, although still enabled. This commit fixes the bug by checking the XDP status in mlx5e_fix_features and disabling LRO if XDP is enabled. Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 01301bee420cf..7efb898e9f96f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3542,6 +3542,13 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, } } + if (params->xdp_prog) { + if (features & NETIF_F_LRO) { + netdev_warn(netdev, "LRO is incompatible with XDP\n"); + features &= ~NETIF_F_LRO; + } + } + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { features &= ~NETIF_F_RXHASH; if (netdev->features & NETIF_F_RXHASH) -- GitLab From ad54e63b832d70df47b2b7910e40373429c9dac3 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 17 May 2022 17:42:31 +0800 Subject: [PATCH 0092/4335] net: af_key: add check for pfkey_broadcast in function pfkey_process [ Upstream commit 4dc2a5a8f6754492180741facf2a8787f2c415d7 ] If skb_clone() returns null pointer, pfkey_broadcast() will return error. Therefore, it should be better to check the return value of pfkey_broadcast() and return error if fails. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jiasheng Jiang Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/key/af_key.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index fd51db3be91c4..92e9d75dba2f4 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2826,8 +2826,10 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb void *ext_hdrs[SADB_EXT_MAX]; int err; - pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, - BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); + err = pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, + BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); + if (err) + return err; memset(ext_hdrs, 0, sizeof(ext_hdrs)); err = parse_exthdrs(skb, hdr, ext_hdrs); -- GitLab From b88e30dcdf64fc18802fed0d2576334fb5b7e269 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 20 Apr 2022 09:44:51 +0100 Subject: [PATCH 0093/4335] ARM: 9196/1: spectre-bhb: enable for Cortex-A15 [ Upstream commit 0dc14aa94ccd8ba35eb17a0f9b123d1566efd39e ] The Spectre-BHB mitigations were inadvertently left disabled for Cortex-A15, due to the fact that cpu_v7_bugs_init() is not called in that case. So fix that. Fixes: b9baf5c8c5c3 ("ARM: Spectre-BHB workaround") Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/mm/proc-v7-bugs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 06dbfb968182d..fb9f3eb6bf483 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -288,6 +288,7 @@ void cpu_v7_ca15_ibe(void) { if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0))) cpu_v7_spectre_v2_init(); + cpu_v7_spectre_bhb_init(); } void cpu_v7_bugs_init(void) -- GitLab From 37bb8ea1542b70a0716484b23c5a46edc75f34b5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 20 Apr 2022 09:46:17 +0100 Subject: [PATCH 0094/4335] ARM: 9197/1: spectre-bhb: fix loop8 sequence for Thumb2 [ Upstream commit 3cfb3019979666bdf33a1010147363cf05e0f17b ] In Thumb2, 'b . + 4' produces a branch instruction that uses a narrow encoding, and so it does not jump to the following instruction as expected. So use W(b) instead. Fixes: 6c7cb60bff7a ("ARM: fix Thumb2 regression with Spectre BHB") Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/kernel/entry-armv.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 46b697dfa4cfc..68261a83b7ad8 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1038,7 +1038,7 @@ vector_bhb_loop8_\name: @ bhb workaround mov r0, #8 -3: b . + 4 +3: W(b) . + 4 subs r0, r0, #1 bne 3b dsb -- GitLab From 6e66d31618a991a345ef9a1e02f8bc22e23dcfce Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 7 Jan 2022 11:25:23 -0800 Subject: [PATCH 0095/4335] mptcp: change the parameter of __mptcp_make_csum [ Upstream commit c312ee219100e86143a1d3cc10b367bc43a0e0b8 ] This patch changed the type of the last parameter of __mptcp_make_csum() from __sum16 to __wsum. And export this function in protocol.h. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/mptcp/options.c | 8 ++++---- net/mptcp/protocol.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index e515ba9ccb5d8..d158f53d3bc3e 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1214,7 +1214,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp) WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); } -static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum16 sum) +u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) { struct csum_pseudo_header header; __wsum csum; @@ -1229,14 +1229,14 @@ static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum1 header.data_len = htons(data_len); header.csum = 0; - csum = csum_partial(&header, sizeof(header), ~csum_unfold(sum)); + csum = csum_partial(&header, sizeof(header), sum); return (__force u16)csum_fold(csum); } static u16 mptcp_make_csum(const struct mptcp_ext *mpext) { return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len, - mpext->csum); + ~csum_unfold(mpext->csum)); } void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, @@ -1368,7 +1368,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, __mptcp_make_csum(opts->data_seq, opts->subflow_seq, opts->data_len, - opts->csum), ptr); + ~csum_unfold(opts->csum)), ptr); } else { put_unaligned_be32(opts->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 82c5dc4d6b49d..6bcdaf01f483b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -718,6 +718,7 @@ void mptcp_token_destroy(struct mptcp_sock *msk); void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); +u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); -- GitLab From f67eeb03fea6178fd0792bf61e2f1cc94eb2dc4b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 7 Jan 2022 11:25:24 -0800 Subject: [PATCH 0096/4335] mptcp: reuse __mptcp_make_csum in validate_data_csum [ Upstream commit 8401e87f5a36d370cbf1e9d4ba602a553ce9324a ] This patch reused __mptcp_make_csum() in validate_data_csum() instead of open-coding. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/mptcp/subflow.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 6172f380dfb76..04afead7316f8 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -845,9 +845,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * bool csum_reqd) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); - struct csum_pseudo_header header; u32 offset, seq, delta; - __wsum csum; + u16 csum; int len; if (!csum_reqd) @@ -908,13 +907,11 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * * while the pseudo header requires the original DSS data len, * including that */ - header.data_seq = cpu_to_be64(subflow->map_seq); - header.subflow_seq = htonl(subflow->map_subflow_seq); - header.data_len = htons(subflow->map_data_len + subflow->map_data_fin); - header.csum = 0; - - csum = csum_partial(&header, sizeof(header), subflow->map_data_csum); - if (unlikely(csum_fold(csum))) { + csum = __mptcp_make_csum(subflow->map_seq, + subflow->map_subflow_seq, + subflow->map_data_len + subflow->map_data_fin, + subflow->map_data_csum); + if (unlikely(csum)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); subflow->send_mp_fail = 1; MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX); -- GitLab From bf6800a394949a8093a823e0707cebc5ab7475f1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 17 May 2022 11:02:11 -0700 Subject: [PATCH 0097/4335] mptcp: fix checksum byte order [ Upstream commit ba2c89e0ea74a904d5231643245753d77422e7f5 ] The MPTCP code typecasts the checksum value to u16 and then converts it to big endian while storing the value into the MPTCP option. As a result, the wire encoding for little endian host is wrong, and that causes interoperabilty interoperability issues with other implementation or host with different endianness. Address the issue writing in the packet the unmodified __sum16 value. MPTCP checksum is disabled by default, interoperating with systems with bad mptcp-level csum encoding should cause fallback to TCP. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/275 Fixes: c5b39e26d003 ("mptcp: send out checksum for DSS") Fixes: 390b95a5fb84 ("mptcp: receive checksum for DSS") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/mptcp/options.c | 36 ++++++++++++++++++++++++------------ net/mptcp/protocol.h | 2 +- net/mptcp/subflow.c | 2 +- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index d158f53d3bc3e..193f0fcce8d85 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -107,7 +107,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 2; } if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) { - mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + mp_opt->csum = get_unaligned((__force __sum16 *)ptr); mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; ptr += 2; } @@ -221,7 +221,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) { mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; - mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + mp_opt->csum = get_unaligned((__force __sum16 *)ptr); ptr += 2; } @@ -1214,7 +1214,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp) WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); } -u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) +__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) { struct csum_pseudo_header header; __wsum csum; @@ -1230,15 +1230,25 @@ u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) header.csum = 0; csum = csum_partial(&header, sizeof(header), sum); - return (__force u16)csum_fold(csum); + return csum_fold(csum); } -static u16 mptcp_make_csum(const struct mptcp_ext *mpext) +static __sum16 mptcp_make_csum(const struct mptcp_ext *mpext) { return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len, ~csum_unfold(mpext->csum)); } +static void put_len_csum(u16 len, __sum16 csum, void *data) +{ + __sum16 *sumptr = data + 2; + __be16 *ptr = data; + + put_unaligned_be16(len, ptr); + + put_unaligned(csum, sumptr); +} + void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { @@ -1315,8 +1325,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, put_unaligned_be32(mpext->subflow_seq, ptr); ptr += 1; if (opts->csum_reqd) { - put_unaligned_be32(mpext->data_len << 16 | - mptcp_make_csum(mpext), ptr); + put_len_csum(mpext->data_len, + mptcp_make_csum(mpext), + ptr); } else { put_unaligned_be32(mpext->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); @@ -1364,11 +1375,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, goto mp_capable_done; if (opts->csum_reqd) { - put_unaligned_be32(opts->data_len << 16 | - __mptcp_make_csum(opts->data_seq, - opts->subflow_seq, - opts->data_len, - ~csum_unfold(opts->csum)), ptr); + put_len_csum(opts->data_len, + __mptcp_make_csum(opts->data_seq, + opts->subflow_seq, + opts->data_len, + ~csum_unfold(opts->csum)), + ptr); } else { put_unaligned_be32(opts->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 6bcdaf01f483b..72a259a74b575 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -718,7 +718,7 @@ void mptcp_token_destroy(struct mptcp_sock *msk); void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); -u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); +__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 04afead7316f8..9c7deffe7cb6e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -846,7 +846,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); u32 offset, seq, delta; - u16 csum; + __sum16 csum; int len; if (!csum_reqd) -- GitLab From e723f67cf0c716c64f8844c011fca6fee7cacf8b Mon Sep 17 00:00:00 2001 From: Kevin Mitchell Date: Tue, 17 May 2022 11:01:05 -0700 Subject: [PATCH 0098/4335] igb: skip phy status check where unavailable [ Upstream commit 942d2ad5d2e0df758a645ddfadffde2795322728 ] igb_read_phy_reg() will silently return, leaving phy_data untouched, if hw->ops.read_reg isn't set. Depending on the uninitialized value of phy_data, this led to the phy status check either succeeding immediately or looping continuously for 2 seconds before emitting a noisy err-level timeout. This message went out to the console even though there was no actual problem. Instead, first check if there is read_reg function pointer. If not, proceed without trying to check the phy status register. Fixes: b72f3f72005d ("igb: When GbE link up, wait for Remote receiver status condition") Signed-off-by: Kevin Mitchell Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/igb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index bf8ef81f6c0e2..b883033514843 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5505,7 +5505,8 @@ static void igb_watchdog_task(struct work_struct *work) break; } - if (adapter->link_speed != SPEED_1000) + if (adapter->link_speed != SPEED_1000 || + !hw->phy.ops.read_reg) goto no_wait; /* wait for Remote receiver status OK */ -- GitLab From b8835ba8c029b5c9ada5666754526c2b00f7ea80 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 17 May 2022 10:44:14 +0200 Subject: [PATCH 0099/4335] netfilter: flowtable: fix TCP flow teardown [ Upstream commit e5eaac2beb54f0a16ff851125082d9faeb475572 ] This patch addresses three possible problems: 1. ct gc may race to undo the timeout adjustment of the packet path, leaving the conntrack entry in place with the internal offload timeout (one day). 2. ct gc removes the ct because the IPS_OFFLOAD_BIT is not set and the CLOSE timeout is reached before the flow offload del. 3. tcp ct is always set to ESTABLISHED with a very long timeout in flow offload teardown/delete even though the state might be already CLOSED. Also as a remark we cannot assume that the FIN or RST packet is hitting flow table teardown as the packet might get bumped to the slow path in nftables. This patch resets IPS_OFFLOAD_BIT from flow_offload_teardown(), so conntrack handles the tcp rst/fin packet which triggers the CLOSE/FIN state transition. Moreover, teturn the connection's ownership to conntrack upon teardown by clearing the offload flag and fixing the established timeout value. The flow table GC thread will asynchonrnously free the flow table and hardware offload entries. Before this patch, the IPS_OFFLOAD_BIT remained set for expired flows on which is also misleading since the flow is back to classic conntrack path. If nf_ct_delete() removes the entry from the conntrack table, then it calls nf_ct_put() which decrements the refcnt. This is not a problem because the flowtable holds a reference to the conntrack object from flow_offload_alloc() path which is released via flow_offload_free(). This patch also updates nft_flow_offload to skip packets in SYN_RECV state. Since we might miss or bump packets to slow path, we do not know what will happen there while we are still in SYN_RECV, this patch postpones offload up to the next packet which also aligns to the existing behaviour in tc-ct. flow_offload_teardown() does not reset the existing tcp state from flow_offload_fixup_tcp() to ESTABLISHED anymore, packets bump to slow path might have already update the state to CLOSE/FIN. Joint work with Oz and Sven. Fixes: 1e5b2471bcc4 ("netfilter: nf_flow_table: teardown flow timeout race") Signed-off-by: Oz Shlomo Signed-off-by: Sven Auhagen Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_flow_table_core.c | 33 +++++++----------------------- net/netfilter/nft_flow_offload.c | 3 ++- 2 files changed, 9 insertions(+), 27 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 52e7f94d2450d..58f3f77b3eb2f 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -173,12 +173,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init); static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) { - tcp->state = TCP_CONNTRACK_ESTABLISHED; tcp->seen[0].td_maxwin = 0; tcp->seen[1].td_maxwin = 0; } -static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) +static void flow_offload_fixup_ct(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); int l4num = nf_ct_protonum(ct); @@ -187,7 +186,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) if (l4num == IPPROTO_TCP) { struct nf_tcp_net *tn = nf_tcp_pernet(net); - timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; + flow_offload_fixup_tcp(&ct->proto.tcp); + + timeout = tn->timeouts[ct->proto.tcp.state]; timeout -= tn->offload_timeout; } else if (l4num == IPPROTO_UDP) { struct nf_udp_net *tn = nf_udp_pernet(net); @@ -205,18 +206,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout); } -static void flow_offload_fixup_ct_state(struct nf_conn *ct) -{ - if (nf_ct_protonum(ct) == IPPROTO_TCP) - flow_offload_fixup_tcp(&ct->proto.tcp); -} - -static void flow_offload_fixup_ct(struct nf_conn *ct) -{ - flow_offload_fixup_ct_state(ct); - flow_offload_fixup_ct_timeout(ct); -} - static void flow_offload_route_release(struct flow_offload *flow) { nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); @@ -355,22 +344,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table, rhashtable_remove_fast(&flow_table->rhashtable, &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, nf_flow_offload_rhash_params); - - clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); - - if (nf_flow_has_expired(flow)) - flow_offload_fixup_ct(flow->ct); - else - flow_offload_fixup_ct_timeout(flow->ct); - flow_offload_free(flow); } void flow_offload_teardown(struct flow_offload *flow) { + clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); set_bit(NF_FLOW_TEARDOWN, &flow->flags); - - flow_offload_fixup_ct_state(flow->ct); + flow_offload_fixup_ct(flow->ct); } EXPORT_SYMBOL_GPL(flow_offload_teardown); @@ -460,7 +441,7 @@ static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || nf_flow_has_stale_dst(flow)) - set_bit(NF_FLOW_TEARDOWN, &flow->flags); + flow_offload_teardown(flow); if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { if (test_bit(NF_FLOW_HW, &flow->flags)) { diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 12145a80ef035..aac6db8680d47 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -298,7 +298,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, case IPPROTO_TCP: tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(_tcph), &_tcph); - if (unlikely(!tcph || tcph->fin || tcph->rst)) + if (unlikely(!tcph || tcph->fin || tcph->rst || + !nf_conntrack_tcp_established(ct))) goto out; break; case IPPROTO_UDP: -- GitLab From c1e170b11276326bbf566bb3e79143452aeb9f65 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 18 Mar 2022 13:11:24 +0100 Subject: [PATCH 0100/4335] netfilter: flowtable: pass flowtable to nf_flow_table_iterate() [ Upstream commit 217cff36e885627c41a14e803fc44f9cbc945767 ] The flowtable object is already passed as argument to nf_flow_table_iterate(), do use not data pointer to pass flowtable. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_flow_table_core.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 58f3f77b3eb2f..de783c9094d73 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -382,7 +382,8 @@ EXPORT_SYMBOL_GPL(flow_offload_lookup); static int nf_flow_table_iterate(struct nf_flowtable *flow_table, - void (*iter)(struct flow_offload *flow, void *data), + void (*iter)(struct nf_flowtable *flowtable, + struct flow_offload *flow, void *data), void *data) { struct flow_offload_tuple_rhash *tuplehash; @@ -406,7 +407,7 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table, flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); - iter(flow, data); + iter(flow_table, flow, data); } rhashtable_walk_stop(&hti); rhashtable_walk_exit(&hti); @@ -434,10 +435,9 @@ static bool nf_flow_has_stale_dst(struct flow_offload *flow) flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple); } -static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) +static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, + struct flow_offload *flow, void *data) { - struct nf_flowtable *flow_table = data; - if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || nf_flow_has_stale_dst(flow)) @@ -462,7 +462,7 @@ static void nf_flow_offload_work_gc(struct work_struct *work) struct nf_flowtable *flow_table; flow_table = container_of(work, struct nf_flowtable, gc_work.work); - nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } @@ -578,7 +578,8 @@ int nf_flow_table_init(struct nf_flowtable *flowtable) } EXPORT_SYMBOL_GPL(nf_flow_table_init); -static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) +static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table, + struct flow_offload *flow, void *data) { struct net_device *dev = data; @@ -620,11 +621,10 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) cancel_delayed_work_sync(&flow_table->gc_work); nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); - nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); nf_flow_table_offload_flush(flow_table); if (nf_flowtable_hw_offload(flow_table)) - nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, - flow_table); + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); rhashtable_destroy(&flow_table->rhashtable); } EXPORT_SYMBOL_GPL(nf_flow_table_free); -- GitLab From 88b937673b3552d54da20f648e61a123f4c1fa67 Mon Sep 17 00:00:00 2001 From: Ritaro Takenaka Date: Tue, 17 May 2022 12:55:30 +0200 Subject: [PATCH 0101/4335] netfilter: flowtable: move dst_check to packet path [ Upstream commit 2738d9d963bd1f06d5114c2b4fa5771a95703991 ] Fixes sporadic IPv6 packet loss when flow offloading is enabled. IPv6 route GC and flowtable GC are not synchronized. When dst_cache becomes stale and a packet passes through the flow before the flowtable GC teardowns it, the packet can be dropped. So, it is necessary to check dst every time in packet path. Fixes: 227e1e4d0d6c ("netfilter: nf_flowtable: skip device lookup from interface index") Signed-off-by: Ritaro Takenaka Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_flow_table_core.c | 23 +---------------------- net/netfilter/nf_flow_table_ip.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index de783c9094d73..9fb407084c506 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -415,32 +415,11 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table, return err; } -static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple) -{ - struct dst_entry *dst; - - if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH || - tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) { - dst = tuple->dst_cache; - if (!dst_check(dst, tuple->dst_cookie)) - return true; - } - - return false; -} - -static bool nf_flow_has_stale_dst(struct flow_offload *flow) -{ - return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) || - flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple); -} - static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, struct flow_offload *flow, void *data) { if (nf_flow_has_expired(flow) || - nf_ct_is_dying(flow->ct) || - nf_flow_has_stale_dst(flow)) + nf_ct_is_dying(flow->ct)) flow_offload_teardown(flow); if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 6257d87c3a56d..28026467b54cd 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -227,6 +227,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) return true; } +static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple) +{ + if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH && + tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM) + return true; + + return dst_check(tuple->dst_cache, tuple->dst_cookie); +} + static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, const struct nf_hook_state *state, struct dst_entry *dst) @@ -346,6 +355,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_state_check(flow, iph->protocol, skb, thoff)) return NF_ACCEPT; + if (!nf_flow_dst_check(&tuplehash->tuple)) { + flow_offload_teardown(flow); + return NF_ACCEPT; + } + if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; @@ -582,6 +596,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff)) return NF_ACCEPT; + if (!nf_flow_dst_check(&tuplehash->tuple)) { + flow_offload_teardown(flow); + return NF_ACCEPT; + } + if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; -- GitLab From 11ad6bab26c826b2fc798276c54109eb0b12e450 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 18 May 2022 02:58:40 +0200 Subject: [PATCH 0102/4335] net: bridge: Clear offload_fwd_mark when passing frame up bridge interface. [ Upstream commit fbb3abdf2223cd0dfc07de85fe5a43ba7f435bdf ] It is possible to stack bridges on top of each other. Consider the following which makes use of an Ethernet switch: br1 / \ / \ / \ br0.11 wlan0 | br0 / | \ p1 p2 p3 br0 is offloaded to the switch. Above br0 is a vlan interface, for vlan 11. This vlan interface is then a slave of br1. br1 also has a wireless interface as a slave. This setup trunks wireless lan traffic over the copper network inside a VLAN. A frame received on p1 which is passed up to the bridge has the skb->offload_fwd_mark flag set to true, indicating that the switch has dealt with forwarding the frame out ports p2 and p3 as needed. This flag instructs the software bridge it does not need to pass the frame back down again. However, the flag is not getting reset when the frame is passed upwards. As a result br1 sees the flag, wrongly interprets it, and fails to forward the frame to wlan0. When passing a frame upwards, clear the flag. This is the Rx equivalent of br_switchdev_frame_unmark() in br_dev_xmit(). Fixes: f1c2eddf4cb6 ("bridge: switchdev: Use an helper to clear forward mark") Signed-off-by: Andrew Lunn Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20220518005840.771575-1-andrew@lunn.ch Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/bridge/br_input.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index b50382f957c12..6743c8a0fe8e1 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -39,6 +39,13 @@ static int br_pass_frame_up(struct sk_buff *skb) dev_sw_netstats_rx_add(brdev, skb->len); vg = br_vlan_group_rcu(br); + + /* Reset the offload_fwd_mark because there could be a stacked + * bridge above, and it should not think this bridge it doing + * that bridge's work forwarding out its ports. + */ + br_switchdev_frame_unmark(skb); + /* Bridge is just like any other port. Make sure the * packet is allowed except in promisc mode when someone * may be running packet capture. -- GitLab From d0116a3f25e29af68b323035030b2fa2a7ee74ec Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 7 Apr 2022 21:38:56 +0200 Subject: [PATCH 0103/4335] riscv: dts: sifive: fu540-c000: align dma node name with dtschema [ Upstream commit b17410182b6f98191fbf7f42d3b4a78512769d29 ] Fixes dtbs_check warnings like: dma@3000000: $nodename:0: 'dma@3000000' does not match '^dma-controller(@.*)?$' Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220407193856.18223-1-krzysztof.kozlowski@linaro.org Fixes: c5ab54e9945b ("riscv: dts: add support for PDMA device of HiFive Unleashed Rev A00") Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/boot/dts/sifive/fu540-c000.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 7db8610534834..64c06c9b41dc8 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -166,7 +166,7 @@ clocks = <&prci PRCI_CLK_TLCLK>; status = "disabled"; }; - dma: dma@3000000 { + dma: dma-controller@3000000 { compatible = "sifive,fu540-c000-pdma"; reg = <0x0 0x3000000 0x0 0x8000>; interrupt-parent = <&plic0>; -- GitLab From e80793223252629091ec08b3f8c3e8f162b6507f Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Thu, 19 May 2022 15:05:29 +0900 Subject: [PATCH 0104/4335] scsi: ufs: core: Fix referencing invalid rsp field [ Upstream commit d5d92b64408443e113b9742f8f1c35278910dd4d ] Fix referencing sense data when it is invalid. When the length of the data segment is 0, there is no valid information in the rsp field, so ufshpb_rsp_upiu() is returned without additional operation. Link: https://lore.kernel.org/r/252651381.41652940482659.JavaMail.epsvc@epcpadp4 Fixes: 4b5f49079c52 ("scsi: ufs: ufshpb: L2P map management for HPB read") Acked-by: Avri Altman Signed-off-by: Daejun Park Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ufs/ufshpb.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c index f7eaf64293a4b..14300896c57fe 100644 --- a/drivers/scsi/ufs/ufshpb.c +++ b/drivers/scsi/ufs/ufshpb.c @@ -1257,6 +1257,13 @@ void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) struct utp_hpb_rsp *rsp_field = &lrbp->ucd_rsp_ptr->hr; int data_seg_len; + data_seg_len = be32_to_cpu(lrbp->ucd_rsp_ptr->header.dword_2) + & MASK_RSP_UPIU_DATA_SEG_LEN; + + /* If data segment length is zero, rsp_field is not valid */ + if (!data_seg_len) + return; + if (unlikely(lrbp->lun != rsp_field->lun)) { struct scsi_device *sdev; bool found = false; @@ -1291,18 +1298,6 @@ void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) return; } - data_seg_len = be32_to_cpu(lrbp->ucd_rsp_ptr->header.dword_2) - & MASK_RSP_UPIU_DATA_SEG_LEN; - - /* To flush remained rsp_list, we queue the map_work task */ - if (!data_seg_len) { - if (!ufshpb_is_general_lun(hpb->lun)) - return; - - ufshpb_kick_map_work(hpb); - return; - } - BUILD_BUG_ON(sizeof(struct utp_hpb_rsp) != UTP_HPB_RSP_SIZE); if (!ufshpb_is_hpb_rsp_valid(hba, lrbp, rsp_field)) -- GitLab From 86eac8d7610100b556db7949e24c3cfdb664da04 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 May 2022 21:25:12 -0300 Subject: [PATCH 0105/4335] perf build: Fix check for btf__load_from_kernel_by_id() in libbpf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0ae065a5d265bc5ada13e350015458e0c5e5c351 ] Avi Kivity reported a problem where the __weak btf__load_from_kernel_by_id() in tools/perf/util/bpf-event.c was being used and it called btf__get_from_id() in tools/lib/bpf/btf.c that in turn called back to btf__load_from_kernel_by_id(), resulting in an endless loop. Fix this by adding a feature test to check if btf__load_from_kernel_by_id() is available when building perf with LIBBPF_DYNAMIC=1, and if not then provide the fallback to the old btf__get_from_id(), that doesn't call back to btf__load_from_kernel_by_id() since at that time it didn't exist at all. Tested on Fedora 35 where we have libbpf-devel 0.4.0 with LIBBPF_DYNAMIC where we don't have btf__load_from_kernel_by_id() and thus its feature test fail, not defining HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID: $ cat /tmp/build/perf-urgent/feature/test-libbpf-btf__load_from_kernel_by_id.make.output test-libbpf-btf__load_from_kernel_by_id.c: In function ‘main’: test-libbpf-btf__load_from_kernel_by_id.c:6:16: error: implicit declaration of function ‘btf__load_from_kernel_by_id’ [-Werror=implicit-function-declaration] 6 | return btf__load_from_kernel_by_id(20151128, NULL); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors $ $ nm /tmp/build/perf-urgent/perf | grep btf__load_from_kernel_by_id 00000000005ba180 T btf__load_from_kernel_by_id $ $ objdump --disassemble=btf__load_from_kernel_by_id -S /tmp/build/perf-urgent/perf /tmp/build/perf-urgent/perf: file format elf64-x86-64 00000000005ba180 : #include "record.h" #include "util/synthetic-events.h" #ifndef HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID struct btf *btf__load_from_kernel_by_id(__u32 id) { 5ba180: 55 push %rbp 5ba181: 48 89 e5 mov %rsp,%rbp 5ba184: 48 83 ec 10 sub $0x10,%rsp 5ba188: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax 5ba18f: 00 00 5ba191: 48 89 45 f8 mov %rax,-0x8(%rbp) 5ba195: 31 c0 xor %eax,%eax struct btf *btf; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" int err = btf__get_from_id(id, &btf); 5ba197: 48 8d 75 f0 lea -0x10(%rbp),%rsi 5ba19b: e8 a0 57 e5 ff call 40f940 5ba1a0: 89 c2 mov %eax,%edx #pragma GCC diagnostic pop return err ? ERR_PTR(err) : btf; 5ba1a2: 48 98 cltq 5ba1a4: 85 d2 test %edx,%edx 5ba1a6: 48 0f 44 45 f0 cmove -0x10(%rbp),%rax } Fixes: 218e7b775d368f38 ("perf bpf: Provide a weak btf__load_from_kernel_by_id() for older libbpf versions") Reported-by: Avi Kivity Link: https://lore.kernel.org/linux-perf-users/f0add43b-3de5-20c5-22c4-70aff4af959f@scylladb.com Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/linux-perf-users/YobjjFOblY4Xvwo7@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/build/Makefile.feature | 1 + tools/build/feature/Makefile | 4 ++++ .../feature/test-libbpf-btf__load_from_kernel_by_id.c | 7 +++++++ tools/perf/Makefile.config | 7 +++++++ tools/perf/util/bpf-event.c | 4 +++- 5 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 88dd7db55d385..6abde487bba19 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -97,6 +97,7 @@ FEATURE_TESTS_EXTRA := \ llvm-version \ clang \ libbpf \ + libbpf-btf__load_from_kernel_by_id \ libpfm4 \ libdebuginfod \ clang-bpf-co-re diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 0e6d685b86177..69a43d9ea331f 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -56,6 +56,7 @@ FILES= \ test-lzma.bin \ test-bpf.bin \ test-libbpf.bin \ + test-libbpf-btf__load_from_kernel_by_id.bin \ test-get_cpuid.bin \ test-sdt.bin \ test-cxx.bin \ @@ -283,6 +284,9 @@ $(OUTPUT)test-bpf.bin: $(OUTPUT)test-libbpf.bin: $(BUILD) -lbpf +$(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin: + $(BUILD) -lbpf + $(OUTPUT)test-sdt.bin: $(BUILD) diff --git a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c new file mode 100644 index 0000000000000..f7c084428735a --- /dev/null +++ b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + return btf__load_from_kernel_by_id(20151128, NULL); +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a92f0f025ec75..e0660bc76b7b4 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -548,9 +548,16 @@ ifndef NO_LIBELF ifeq ($(feature-libbpf), 1) EXTLIBS += -lbpf $(call detected,CONFIG_LIBBPF_DYNAMIC) + + $(call feature_check,libbpf-btf__load_from_kernel_by_id) + ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1) + CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID + endif else dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif + else + CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID endif endif diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 16ad0e6e9e9c5..cf1b9f6ec0dbe 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -21,7 +21,8 @@ #include "record.h" #include "util/synthetic-events.h" -struct btf * __weak btf__load_from_kernel_by_id(__u32 id) +#ifndef HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID +struct btf *btf__load_from_kernel_by_id(__u32 id) { struct btf *btf; #pragma GCC diagnostic push @@ -31,6 +32,7 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id) return err ? ERR_PTR(err) : btf; } +#endif #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) -- GitLab From e7c6ac3cc2b91a58e883995922d4a9cab28a8dc8 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 11 May 2022 10:15:04 +0800 Subject: [PATCH 0106/4335] gpio: gpio-vf610: do not touch other bits when set the target bit [ Upstream commit 9bf3ac466faa83d51a8fe9212131701e58fdef74 ] For gpio controller contain register PDDR, when set one target bit, current logic will clear all other bits, this is wrong. Use operator '|=' to fix it. Fixes: 659d8a62311f ("gpio: vf610: add imx7ulp support") Reviewed-by: Peng Fan Signed-off-by: Haibo Chen Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-vf610.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index e0f2b67558e74..47e191e11c696 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -125,9 +125,13 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, { struct vf610_gpio_port *port = gpiochip_get_data(chip); unsigned long mask = BIT(gpio); + u32 val; - if (port->sdata && port->sdata->have_paddr) - vf610_gpio_writel(mask, port->gpio_base + GPIO_PDDR); + if (port->sdata && port->sdata->have_paddr) { + val = vf610_gpio_readl(port->gpio_base + GPIO_PDDR); + val |= mask; + vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR); + } vf610_gpio_set(chip, gpio, value); -- GitLab From d883b2e9afb1492d13cfff05c4894e1c3c240d5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 11 May 2022 09:58:56 +0200 Subject: [PATCH 0107/4335] gpio: mvebu/pwm: Refuse requests with inverted polarity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3ecb10175b1f776f076553c24e2689e42953fef5 ] The driver doesn't take struct pwm_state::polarity into account when configuring the hardware, so refuse requests for inverted polarity. Fixes: 757642f9a584 ("gpio: mvebu: Add limited PWM support") Signed-off-by: Uwe Kleine-König Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-mvebu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index ad8822da7c27a..1448dc874dfc3 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -707,6 +707,9 @@ static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, unsigned long flags; unsigned int on, off; + if (state->polarity != PWM_POLARITY_NORMAL) + return -EINVAL; + val = (unsigned long long) mvpwm->clk_rate * state->duty_cycle; do_div(val, NSEC_PER_SEC); if (val > UINT_MAX + 1ULL) -- GitLab From dff5463bc750792cbbde2a2ca8210f0c2575a70b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 18 May 2022 07:51:25 -0700 Subject: [PATCH 0108/4335] perf regs x86: Fix arch__intr_reg_mask() for the hybrid platform [ Upstream commit 01b28e4a58152e8906eeb5f1b55a0c404c48c7c8 ] The X86 specific arch__intr_reg_mask() is to check whether the kernel and hardware can collect XMM registers. But it doesn't work on some hybrid platform. Without the patch on ADL-N: $ perf record -I? available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15 The config of the test event doesn't contain the PMU information. The kernel may fail to initialize it on the correct hybrid PMU and return the wrong non-supported information. Add the PMU information into the config for the hybrid platform. The same register set is supported among different hybrid PMUs. Checking the first available one is good enough. With the patch on ADL-N: $ perf record -I? available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 XMM9 XMM10 XMM11 XMM12 XMM13 XMM14 XMM15 Fixes: 6466ec14aaf44ff1 ("perf regs x86: Add X86 specific arch__intr_reg_mask()") Reported-by: Ammy Yi Signed-off-by: Kan Liang Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20220518145125.1494156-1-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/arch/x86/util/perf_regs.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index 207c56805c551..0ed177991ad05 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -9,6 +9,8 @@ #include "../../../util/perf_regs.h" #include "../../../util/debug.h" #include "../../../util/event.h" +#include "../../../util/pmu.h" +#include "../../../util/pmu-hybrid.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG(AX, PERF_REG_X86_AX), @@ -284,12 +286,22 @@ uint64_t arch__intr_reg_mask(void) .disabled = 1, .exclude_kernel = 1, }; + struct perf_pmu *pmu; int fd; /* * In an unnamed union, init it here to build on older gcc versions */ attr.sample_period = 1; + if (perf_pmu__has_hybrid()) { + /* + * The same register set is supported among different hybrid PMUs. + * Only check the first available one. + */ + pmu = list_first_entry(&perf_pmu__hybrid_pmus, typeof(*pmu), hybrid_list); + attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + } + event_attr_init(&attr); fd = sys_perf_event_open(&attr, 0, -1, -1, 0); -- GitLab From 0b56244bda16e7121d10c28dd035e3a7e091875e Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 20 May 2022 10:11:58 +0200 Subject: [PATCH 0109/4335] perf bench numa: Address compiler error on s390 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f8ac1c478424a9a14669b8cef7389b1e14e5229d ] The compilation on s390 results in this error: # make DEBUG=y bench/numa.o ... bench/numa.c: In function ‘__bench_numa’: bench/numa.c:1749:81: error: ‘%d’ directive output may be truncated writing between 1 and 11 bytes into a region of size between 10 and 20 [-Werror=format-truncation=] 1749 | snprintf(tname, sizeof(tname), "process%d:thread%d", p, t); ^~ ... bench/numa.c:1749:64: note: directive argument in the range [-2147483647, 2147483646] ... # The maximum length of the %d replacement is 11 characters because of the negative sign. Therefore extend the array by two more characters. Output after: # make DEBUG=y bench/numa.o > /dev/null 2>&1; ll bench/numa.o -rw-r--r-- 1 root root 418320 May 19 09:11 bench/numa.o # Fixes: 3aff8ba0a4c9c919 ("perf bench numa: Avoid possible truncation when using snprintf()") Suggested-by: Namhyung Kim Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20220520081158.2990006-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/bench/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index f2640179ada9e..c2c81567afa50 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1672,7 +1672,7 @@ static int __bench_numa(const char *name) "GB/sec,", "total-speed", "GB/sec total speed"); if (g->p.show_details >= 2) { - char tname[14 + 2 * 10 + 1]; + char tname[14 + 2 * 11 + 1]; struct thread_data *td; for (p = 0; p < g->p.nr_proc; p++) { for (t = 0; t < g->p.nr_threads; t++) { -- GitLab From d711a211844a00b9fc2dd51db25642ab327c63f8 Mon Sep 17 00:00:00 2001 From: Brian Bunker Date: Mon, 2 May 2022 08:09:17 -0700 Subject: [PATCH 0110/4335] scsi: scsi_dh_alua: Properly handle the ALUA transitioning state [ Upstream commit 6056a92ceb2a7705d61df7ec5370548e96aee258 ] The handling of the ALUA transitioning state is currently broken. When a target goes into this state, it is expected that the target is allowed to stay in this state for the implicit transition timeout without a path failure. The handler has this logic, but it gets skipped currently. When the target transitions, there is in-flight I/O from the initiator. The first of these responses from the target will be a unit attention letting the initiator know that the ALUA state has changed. The remaining in-flight I/Os, before the initiator finds out that the portal state has changed, will return not ready, ALUA state is transitioning. The portal state will change to SCSI_ACCESS_STATE_TRANSITIONING. This will lead to all new I/O immediately failing the path unexpectedly. The path failure happens in less than a second instead of the expected successes until the transition timer is exceeded. Allow I/Os to continue while the path is in the ALUA transitioning state. The handler already takes care of a target that stays in the transitioning state for too long by changing the state to ALUA state standby once the transition timeout is exceeded at which point the path will fail. Link: https://lore.kernel.org/r/CAHZQxy+4sTPz9+pY3=7VJH+CLUJsDct81KtnR2be8ycN5mhqTg@mail.gmail.com Reviewed-by: Hannes Reinecke Acked-by: Krishna Kant Acked-by: Seamus Connor Signed-off-by: Brian Bunker Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/device_handler/scsi_dh_alua.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 37d06f993b761..1d9be771f3ee0 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -1172,9 +1172,8 @@ static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req) case SCSI_ACCESS_STATE_OPTIMAL: case SCSI_ACCESS_STATE_ACTIVE: case SCSI_ACCESS_STATE_LBA: - return BLK_STS_OK; case SCSI_ACCESS_STATE_TRANSITIONING: - return BLK_STS_AGAIN; + return BLK_STS_OK; default: req->rq_flags |= RQF_QUIET; return BLK_STS_IOERR; -- GitLab From e2c54b945864ec727c1590f6ac24a19b3866b76c Mon Sep 17 00:00:00 2001 From: Gleb Chesnokov Date: Fri, 15 Apr 2022 12:42:29 +0000 Subject: [PATCH 0111/4335] scsi: qla2xxx: Fix missed DMA unmap for aborted commands [ Upstream commit 26f9ce53817a8fd84b69a73473a7de852a24c897 ] Aborting commands that have already been sent to the firmware can cause BUG in qlt_free_cmd(): BUG_ON(cmd->sg_mapped) For instance: - Command passes rdx_to_xfer state, maps sgl, sends to the firmware - Reset occurs, qla2xxx performs ISP error recovery, aborts the command - Target stack calls qlt_abort_cmd() and then qlt_free_cmd() - BUG_ON(cmd->sg_mapped) in qlt_free_cmd() occurs because sgl was not unmapped Thus, unmap sgl in qlt_abort_cmd() for commands with the aborted flag set. Link: https://lore.kernel.org/r/AS8PR10MB4952D545F84B6B1DFD39EC1E9DEE9@AS8PR10MB4952.EURPRD10.PROD.OUTLOOK.COM Reviewed-by: Himanshu Madhani Signed-off-by: Gleb Chesnokov Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_target.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index f5d32d830a9bf..ae5eaa4a9283a 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3837,6 +3837,9 @@ int qlt_abort_cmd(struct qla_tgt_cmd *cmd) spin_lock_irqsave(&cmd->cmd_lock, flags); if (cmd->aborted) { + if (cmd->sg_mapped) + qlt_unmap_sg(vha, cmd); + spin_unlock_irqrestore(&cmd->cmd_lock, flags); /* * It's normal to see 2 calls in this path: -- GitLab From 4bcc2ab96fce497ab9c7863dd051bdcf0a5018bf Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 20 Apr 2022 12:50:38 +0200 Subject: [PATCH 0112/4335] mac80211: fix rx reordering with non explicit / psmp ack policy [ Upstream commit 5e469ed9764d4722c59562da13120bd2dc6834c5 ] When the QoS ack policy was set to non explicit / psmp ack, frames are treated as not being part of a BA session, which causes extra latency on reordering. Fix this by only bypassing reordering for packets with no-ack policy Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20220420105038.36443-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index eab6283b3479c..743e97ba352c8 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1400,8 +1400,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, goto dont_reorder; /* not part of a BA session */ - if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK && - ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL) + if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK) goto dont_reorder; /* new, potentially un-ordered, ampdu frame - process it */ -- GitLab From 020fb19eae9cd398b4d85e2df6bc7a8bd4453d39 Mon Sep 17 00:00:00 2001 From: Kieran Frewen Date: Wed, 20 Apr 2022 04:13:21 +0000 Subject: [PATCH 0113/4335] nl80211: validate S1G channel width [ Upstream commit 5d087aa759eb82b8208411913f6c2158bd85abc0 ] Validate the S1G channel width input by user to ensure it matches that of the requested channel Signed-off-by: Kieran Frewen Signed-off-by: Bassem Dawood Link: https://lore.kernel.org/r/20220420041321.3788789-2-kieran.frewen@morsemicro.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fe9cade6b4fb2..9fae09e860e10 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3080,6 +3080,15 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, } else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) { chandef->width = nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]); + if (chandef->chan->band == NL80211_BAND_S1GHZ) { + /* User input error for channel width doesn't match channel */ + if (chandef->width != ieee80211_s1g_channel_width(chandef->chan)) { + NL_SET_ERR_MSG_ATTR(extack, + attrs[NL80211_ATTR_CHANNEL_WIDTH], + "bad channel width"); + return -EINVAL; + } + } if (attrs[NL80211_ATTR_CENTER_FREQ1]) { chandef->center_freq1 = nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]); -- GitLab From 9e87c228be71c3f054f1ea9770b264ab577068e5 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 4 May 2022 11:07:39 +0200 Subject: [PATCH 0114/4335] selftests: add ping test with ping_group_range tuned [ Upstream commit e71b7f1f44d3d88c677769c85ef0171caf9fc89f ] The 'ping' utility is able to manage two kind of sockets (raw or icmp), depending on the sysctl ping_group_range. By default, ping_group_range is set to '1 0', which forces ping to use an ip raw socket. Let's replay the ping tests by allowing 'ping' to use the ip icmp socket. After the previous patch, ipv4 tests results are the same with both kinds of socket. For ipv6, there are a lot a new failures (the previous patch fixes only two cases). Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/fcnal-test.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index aec9e784d0b46..91f54112167f1 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -803,10 +803,16 @@ ipv4_ping() setup set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null ipv4_ping_novrf + setup + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv4_ping_novrf log_subsection "With VRF" setup "yes" ipv4_ping_vrf + setup "yes" + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv4_ping_vrf } ################################################################################ @@ -2324,10 +2330,16 @@ ipv6_ping() log_subsection "No VRF" setup ipv6_ping_novrf + setup + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv6_ping_novrf log_subsection "With VRF" setup "yes" ipv6_ping_vrf + setup "yes" + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv6_ping_vrf } ################################################################################ -- GitLab From 92dc6278dec983f7dfc33f28bb929d5ca92087ac Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 4 May 2022 13:59:17 +0200 Subject: [PATCH 0115/4335] Revert "fbdev: Make fb_release() return -ENODEV if fbdev was unregistered" [ Upstream commit 135332f34ba2662bc1e32b5c612e06a8cc41a053 ] This reverts commit aafa025c76dcc7d1a8c8f0bdefcbe4eb480b2f6a. That commit attempted to fix a NULL pointer dereference, caused by the struct fb_info associated with a framebuffer device to not longer be valid when the file descriptor was closed. The issue was exposed by commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal"), which added a new path that goes through the struct device removal instead of directly unregistering the fb. Most fbdev drivers have issues with the fb_info lifetime, because call to framebuffer_release() from their driver's .remove callback, rather than doing from fbops.fb_destroy callback. This meant that due to this switch, the fb_info was now destroyed too early, while references still existed, while before it was simply leaked. The patch we're reverting here reinstated that leak, hence "fixed" the regression. But the proper solution is to fix the drivers to not release the fb_info too soon. Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220504115917.758787-1-javierm@redhat.com Signed-off-by: Sasha Levin --- drivers/video/fbdev/core/fbmem.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 8e38a7a5cf2f5..0371ad233fdf2 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1436,10 +1436,7 @@ fb_release(struct inode *inode, struct file *file) __acquires(&info->lock) __releases(&info->lock) { - struct fb_info * const info = file_fb_info(file); - - if (!info) - return -ENODEV; + struct fb_info * const info = file->private_data; lock_fb_info(info); if (info->fbops->fb_release) -- GitLab From 4f631f9f9d08a7c6e47d2f19d866435fe70c3db5 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 6 May 2022 00:04:13 +0200 Subject: [PATCH 0116/4335] fbdev: Prevent possible use-after-free in fb_release() [ Upstream commit 89bfd4017e58faaf70411555e7f508495114e90b ] Most fbdev drivers have issues with the fb_info lifetime, because call to framebuffer_release() from their driver's .remove callback, rather than doing from fbops.fb_destroy callback. Doing that will destroy the fb_info too early, while references to it may still exist, leading to a use-after-free error. To prevent this, check the fb_info reference counter when attempting to kfree the data structure in framebuffer_release(). That will leak it but at least will prevent the mentioned error. Signed-off-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220505220413.365977-1-javierm@redhat.com Signed-off-by: Sasha Levin --- drivers/video/fbdev/core/fbsysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/video/fbdev/core/fbsysfs.c b/drivers/video/fbdev/core/fbsysfs.c index 65dae05fff8e6..ce699396d6bad 100644 --- a/drivers/video/fbdev/core/fbsysfs.c +++ b/drivers/video/fbdev/core/fbsysfs.c @@ -80,6 +80,10 @@ void framebuffer_release(struct fb_info *info) { if (!info) return; + + if (WARN_ON(refcount_read(&info->count))) + return; + kfree(info->apertures); kfree(info); } -- GitLab From 7140149849d22a41156c12a25602918709dcb9f7 Mon Sep 17 00:00:00 2001 From: Lina Wang Date: Thu, 5 May 2022 13:48:49 +0800 Subject: [PATCH 0117/4335] net: fix wrong network header length [ Upstream commit cf3ab8d4a797960b4be20565abb3bcd227b18a68 ] When clatd starts with ebpf offloaing, and NETIF_F_GRO_FRAGLIST is enable, several skbs are gathered in skb_shinfo(skb)->frag_list. The first skb's ipv6 header will be changed to ipv4 after bpf_skb_proto_6_to_4, network_header\transport_header\mac_header have been updated as ipv4 acts, but other skbs in frag_list didnot update anything, just ipv6 packets. udp_queue_rcv_skb will call skb_segment_list to traverse other skbs in frag_list and make sure right udp payload is delivered to user space. Unfortunately, other skbs in frag_list who are still ipv6 packets are updated like the first skb and will have wrong transport header length. e.g.before bpf_skb_proto_6_to_4,the first skb and other skbs in frag_list has the same network_header(24)& transport_header(64), after bpf_skb_proto_6_to_4, ipv6 protocol has been changed to ipv4, the first skb's network_header is 44,transport_header is 64, other skbs in frag_list didnot change.After skb_segment_list, the other skbs in frag_list has different network_header(24) and transport_header(44), so there will be 20 bytes different from original,that is difference between ipv6 header and ipv4 header. Just change transport_header to be the same with original. Actually, there are two solutions to fix it, one is traversing all skbs and changing every skb header in bpf_skb_proto_6_to_4, the other is modifying frag_list skb's header in skb_segment_list. Considering efficiency, adopt the second one--- when the first skb and other skbs in frag_list has different network_header length, restore them to make sure right udp payload is delivered to user space. Signed-off-by: Lina Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/skbuff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e4badc189e37c..7ef0f5a8ab036 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3873,7 +3873,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, unsigned int delta_len = 0; struct sk_buff *tail = NULL; struct sk_buff *nskb, *tmp; - int err; + int len_diff, err; skb_push(skb, -skb_network_offset(skb) + offset); @@ -3913,9 +3913,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, skb_push(nskb, -skb_network_offset(nskb) + offset); skb_release_head_state(nskb); + len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb); __copy_skb_header(nskb, skb); skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb)); + nskb->transport_header += len_diff; skb_copy_from_linear_data_offset(skb, -tnl_hlen, nskb->data - tnl_hlen, offset + tnl_hlen); -- GitLab From dc5c5b74075f79f8dcfb8ca3a534f6789df5efe5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 May 2022 10:21:38 +0200 Subject: [PATCH 0118/4335] nl80211: fix locking in nl80211_set_tx_bitrate_mask() [ Upstream commit f971e1887fdb3ab500c9bebf4b98f62d49a20655 ] This accesses the wdev's chandef etc., so cannot safely be used without holding the lock. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20220506102136.06b7205419e6.I2a87c05fbd8bc5e565e84d190d4cfd2e92695a90@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9fae09e860e10..7c65ad17bf50a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11341,18 +11341,23 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct cfg80211_bitrate_mask mask; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; int err; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; + wdev_lock(wdev); err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &mask, dev, true); if (err) - return err; + goto out; - return rdev_set_bitrate_mask(rdev, dev, NULL, &mask); + err = rdev_set_bitrate_mask(rdev, dev, NULL, &mask); +out: + wdev_unlock(wdev); + return err; } static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) -- GitLab From b076fa169465024ba2abd22300a1c814452bc484 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 6 May 2022 17:42:50 +0800 Subject: [PATCH 0119/4335] ethernet: tulip: fix missing pci_disable_device() on error in tulip_init_one() [ Upstream commit 51ca86b4c9c7c75f5630fa0dbe5f8f0bd98e3c3e ] Fix the missing pci_disable_device() before return from tulip_init_one() in the error handling case. Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220506094250.3630615-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/dec/tulip/tulip_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index fcedd733bacbf..834a3f8c80da0 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1398,8 +1398,10 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* alloc_etherdev ensures aligned and zeroed private structures */ dev = alloc_etherdev (sizeof (*tp)); - if (!dev) + if (!dev) { + pci_disable_device(pdev); return -ENOMEM; + } SET_NETDEV_DEV(dev, &pdev->dev); if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) { @@ -1778,6 +1780,7 @@ err_out_free_res: err_out_free_netdev: free_netdev (dev); + pci_disable_device(pdev); return -ENODEV; } -- GitLab From fd721da2df7a4b6db80ed4ac99b636a2cbb143aa Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 10 May 2022 11:13:16 +0800 Subject: [PATCH 0120/4335] net: stmmac: fix missing pci_disable_device() on error in stmmac_pci_probe() [ Upstream commit 0807ce0b010418a191e0e4009803b2d74c3245d5 ] Switch to using pcim_enable_device() to avoid missing pci_disable_device(). Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220510031316.1780409-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index fcf17d8a0494b..644bb54f5f020 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -181,7 +181,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev, return -ENOMEM; /* Enable pci device */ - ret = pci_enable_device(pdev); + ret = pcim_enable_device(pdev); if (ret) { dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); @@ -241,8 +241,6 @@ static void stmmac_pci_remove(struct pci_dev *pdev) pcim_iounmap_regions(pdev, BIT(i)); break; } - - pci_disable_device(pdev); } static int __maybe_unused stmmac_pci_suspend(struct device *dev) -- GitLab From d955f45d1a56638c8d5cb6d7e77df73382916aaa Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 9 May 2022 19:28:23 -0700 Subject: [PATCH 0121/4335] net: atlantic: fix "frag[0] not initialized" [ Upstream commit 62e0ae0f4020250f961cf8d0103a4621be74e077 ] In aq_ring_rx_clean(), if buff->is_eop is not set AND buff->len < AQ_CFG_RX_HDR_SIZE, then hdr_len remains equal to buff->len and skb_add_rx_frag(xxx, *0*, ...) is not called. The loop following this code starts calling skb_add_rx_frag() starting with i=1 and thus frag[0] is never initialized. Since i is initialized to zero at the top of the primary loop, we can just reference and post-increment i instead of hardcoding the 0 when calling skb_add_rx_frag() the first time. Reported-by: Aashay Shringarpure Reported-by: Yi Chou Reported-by: Shervin Oloumi Signed-off-by: Grant Grundler Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 72f8751784c31..7cf5a48e9a7d8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -445,7 +445,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, ALIGN(hdr_len, sizeof(long))); if (buff->len - hdr_len > 0) { - skb_add_rx_frag(skb, 0, buff->rxdata.page, + skb_add_rx_frag(skb, i++, buff->rxdata.page, buff->rxdata.pg_off + hdr_len, buff->len - hdr_len, AQ_CFG_RX_FRAME_MAX); @@ -454,7 +454,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self, if (!buff->is_eop) { buff_ = buff; - i = 1U; do { next_ = buff_->next; buff_ = &self->buff_ring[next_]; -- GitLab From 2d1b336ffb77125661c4ca1020a6844361d30e38 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 9 May 2022 19:28:24 -0700 Subject: [PATCH 0122/4335] net: atlantic: reduce scope of is_rsc_complete [ Upstream commit 79784d77ebbd3ec516b7a5ce555d979fb7946202 ] Don't defer handling the err case outside the loop. That's pointless. And since is_rsc_complete is only used inside this loop, declare it inside the loop to reduce it's scope. Signed-off-by: Grant Grundler Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 7cf5a48e9a7d8..339efdfb1d49a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -345,7 +345,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self, int budget) { struct net_device *ndev = aq_nic_get_ndev(self->aq_nic); - bool is_rsc_completed = true; int err = 0; for (; (self->sw_head != self->hw_head) && budget; @@ -365,6 +364,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, if (!buff->is_eop) { buff_ = buff; do { + bool is_rsc_completed = true; + if (buff_->next >= self->size) { err = -EIO; goto err_exit; @@ -376,18 +377,16 @@ int aq_ring_rx_clean(struct aq_ring_s *self, next_, self->hw_head); - if (unlikely(!is_rsc_completed)) - break; + if (unlikely(!is_rsc_completed)) { + err = 0; + goto err_exit; + } buff->is_error |= buff_->is_error; buff->is_cso_err |= buff_->is_cso_err; } while (!buff_->is_eop); - if (!is_rsc_completed) { - err = 0; - goto err_exit; - } if (buff->is_error || (buff->is_lro && buff->is_cso_err)) { buff_ = buff; -- GitLab From 948ddbdc56636773401f2cb9c7a932eb9c43ccfd Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 9 May 2022 19:28:25 -0700 Subject: [PATCH 0123/4335] net: atlantic: add check for MAX_SKB_FRAGS [ Upstream commit 6aecbba12b5c90b26dc062af3b9de8c4b3a2f19f ] Enforce that the CPU can not get stuck in an infinite loop. Reported-by: Aashay Shringarpure Reported-by: Yi Chou Reported-by: Shervin Oloumi Signed-off-by: Grant Grundler Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 339efdfb1d49a..e9c6f1fa0b1a7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -362,6 +362,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, continue; if (!buff->is_eop) { + unsigned int frag_cnt = 0U; buff_ = buff; do { bool is_rsc_completed = true; @@ -370,6 +371,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, err = -EIO; goto err_exit; } + + frag_cnt++; next_ = buff_->next, buff_ = &self->buff_ring[next_]; is_rsc_completed = @@ -377,7 +380,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, next_, self->hw_head); - if (unlikely(!is_rsc_completed)) { + if (unlikely(!is_rsc_completed) || + frag_cnt > MAX_SKB_FRAGS) { err = 0; goto err_exit; } -- GitLab From 47840af397bef4ee5c27dbad771f879a267fff38 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 9 May 2022 19:28:26 -0700 Subject: [PATCH 0124/4335] net: atlantic: verify hw_head_ lies within TX buffer ring [ Upstream commit 2120b7f4d128433ad8c5f503a9584deba0684901 ] Bounds check hw_head index provided by NIC to verify it lies within the TX buffer ring. Reported-by: Aashay Shringarpure Reported-by: Yi Chou Reported-by: Shervin Oloumi Signed-off-by: Grant Grundler Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 9f1b15077e7d6..45c17c585d743 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -889,6 +889,13 @@ int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self, err = -ENXIO; goto err_exit; } + + /* Validate that the new hw_head_ is reasonable. */ + if (hw_head_ >= ring->size) { + err = -ENXIO; + goto err_exit; + } + ring->hw_head = hw_head_; err = aq_hw_err_from_flags(self); -- GitLab From 149a25b82ca991c65d989a0148c3ab5d765e5272 Mon Sep 17 00:00:00 2001 From: Shreyas K K Date: Thu, 12 May 2022 16:31:34 +0530 Subject: [PATCH 0125/4335] arm64: Enable repeat tlbi workaround on KRYO4XX gold CPUs [ Upstream commit 51f559d66527e238f9a5f82027bff499784d4eac ] Add KRYO4XX gold/big cores to the list of CPUs that need the repeat TLBI workaround. Apply this to the affected KRYO4XX cores (rcpe to rfpe). The variant and revision bits are implementation defined and are different from the their Cortex CPU counterparts on which they are based on, i.e., (r0p0 to r3p0) is equivalent to (rcpe to rfpe). Signed-off-by: Shreyas K K Reviewed-by: Sai Prakash Ranjan Link: https://lore.kernel.org/r/20220512110134.12179-1-quic_shrekk@quicinc.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- Documentation/arm64/silicon-errata.rst | 3 +++ arch/arm64/kernel/cpu_errata.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index d410a47ffa57a..7c1750bcc5bd8 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -163,6 +163,9 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Kryo4xx Silver | N/A | ARM64_ERRATUM_1024718 | +----------------+-----------------+-----------------+-----------------------------+ +| Qualcomm Tech. | Kryo4xx Gold | N/A | ARM64_ERRATUM_1286807 | ++----------------+-----------------+-----------------+-----------------------------+ + +----------------+-----------------+-----------------+-----------------------------+ | Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a33d7b8f3b935..c67c19d701597 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -208,6 +208,8 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #ifdef CONFIG_ARM64_ERRATUM_1286807 { ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), + /* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */ + ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe), }, #endif {}, -- GitLab From 331c57431f7a134961374dbbbe3d57f1e9553607 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 18 May 2022 14:28:32 -0700 Subject: [PATCH 0126/4335] Input: ili210x - fix reset timing commit e4920d42ce0e9c8aafb7f64b6d9d4ae02161e51e upstream. According to Ilitek "231x & ILI251x Programming Guide" Version: 2.30 "2.1. Power Sequence", "T4 Chip Reset and discharge time" is minimum 10ms and "T2 Chip initial time" is maximum 150ms. Adjust the reset timings such that T4 is 12ms and T2 is 160ms to fit those figures. This prevents sporadic touch controller start up failures when some systems with at least ILI251x controller boot, without this patch the systems sometimes fail to communicate with the touch controller. Fixes: 201f3c803544c ("Input: ili210x - add reset GPIO support") Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20220518204901.93534-1-marex@denx.de Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/ili210x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 30576a5f2f045..f437eefec94ad 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -420,9 +420,9 @@ static int ili210x_i2c_probe(struct i2c_client *client, if (error) return error; - usleep_range(50, 100); + usleep_range(12000, 15000); gpiod_set_value_cansleep(reset_gpio, 0); - msleep(100); + msleep(160); } priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); -- GitLab From a2797b550755c0f37df633dfa83e000470e64f8b Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Tue, 29 Mar 2022 10:39:28 -0700 Subject: [PATCH 0127/4335] dt-bindings: pinctrl: aspeed-g6: remove FWQSPID group commit a29c96a4053dc3c1d39353b61089882f81c6b23d upstream. FWQSPID is not a group of FWSPID so remove it. Fixes: 7488838f2315 ("dt-bindings: pinctrl: aspeed: Document AST2600 pinmux") Signed-off-by: Jae Hyun Yoo Acked-by: Rob Herring Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20220329173932.2588289-4-quic_jaehyoo@quicinc.com Signed-off-by: Joel Stanley Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml index ad2866c997383..fcd82df3aebbd 100644 --- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml @@ -58,7 +58,7 @@ patternProperties: $ref: "/schemas/types.yaml#/definitions/string" enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMCG1, EMMCG4, - EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWQSPID, FWSPIWP, + EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, HVI3C3, HVI3C4, I2C1, I2C10, I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, -- GitLab From 649178c0493e4080b2b226b0ef9fa2d834b1b412 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 7 Jan 2022 15:30:03 +0800 Subject: [PATCH 0128/4335] mt76: mt7921e: fix possible probe failure after reboot commit 602cc0c9618a819ab00ea3c9400742a0ca318380 upstream. It doesn't guarantee the mt7921e gets started with ASPM L0 after each machine reboot on every platform. If mt7921e gets started with not ASPM L0, it would be possible that the driver encounters time to time failure in mt7921_pci_probe, like a weird chip identifier is read [ 215.514503] mt7921e 0000:05:00.0: ASIC revision: feed0000 [ 216.604741] mt7921e: probe of 0000:05:00.0 failed with error -110 or failing to init hardware because the driver is not allowed to access the register until the device is in ASPM L0 state. So, we call __mt7921e_mcu_drv_pmctrl in early mt7921_pci_probe to force the device to bring back to the L0 state for we can safely access registers in any case. In the patch, we move all functions from dma.c to pci.c and register mt76 bus operation earilier, that is the __mt7921e_mcu_drv_pmctrl depends on. Fixes: bf3747ae2e25 ("mt76: mt7921: enable aspm by default") Reported-by: Kai-Chuan Hsieh Co-developed-by: Deren Wu Signed-off-by: Deren Wu Signed-off-by: Sean Wang Signed-off-by: Felix Fietkau Signed-off-by: Joakim Tjernlund Signed-off-by: Greg Kroah-Hartman --- .../net/wireless/mediatek/mt76/mt7921/dma.c | 115 ----------------- .../net/wireless/mediatek/mt76/mt7921/mcu.c | 20 +-- .../net/wireless/mediatek/mt76/mt7921/pci.c | 121 ++++++++++++++++++ 3 files changed, 131 insertions(+), 125 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c index 7d7d43a5422f8..93d0cc1827d26 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c @@ -118,109 +118,6 @@ static void mt7921_dma_prefetch(struct mt7921_dev *dev) mt76_wr(dev, MT_WFDMA0_TX_RING17_EXT_CTRL, PREFETCH(0x380, 0x4)); } -static u32 __mt7921_reg_addr(struct mt7921_dev *dev, u32 addr) -{ - static const struct { - u32 phys; - u32 mapped; - u32 size; - } fixed_map[] = { - { 0x00400000, 0x80000, 0x10000}, /* WF_MCU_SYSRAM */ - { 0x00410000, 0x90000, 0x10000}, /* WF_MCU_SYSRAM (configure register) */ - { 0x40000000, 0x70000, 0x10000}, /* WF_UMAC_SYSRAM */ - { 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */ - { 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */ - { 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */ - { 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */ - { 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */ - { 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */ - { 0x7c060000, 0xe0000, 0x10000}, /* CONN_INFRA, conn_host_csr_top */ - { 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */ - { 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */ - { 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */ - { 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */ - { 0x820cc000, 0x0e000, 0x2000 }, /* WF_UMAC_TOP (PP) */ - { 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */ - { 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */ - { 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */ - { 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */ - { 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */ - { 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */ - { 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */ - { 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */ - { 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */ - { 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */ - { 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */ - { 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */ - { 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */ - { 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */ - { 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */ - { 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */ - { 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */ - { 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */ - { 0x820f3000, 0xa0c00, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_ARB) */ - { 0x820f4000, 0xa1000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_TMAC) */ - { 0x820f5000, 0xa1400, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_RMAC) */ - { 0x820f7000, 0xa1e00, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_DMA) */ - { 0x820f9000, 0xa3400, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_WTBLOFF) */ - { 0x820fa000, 0xa4000, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_ETBF) */ - { 0x820fb000, 0xa4200, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_LPON) */ - { 0x820fc000, 0xa4600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_INT) */ - { 0x820fd000, 0xa4800, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_MIB) */ - }; - int i; - - if (addr < 0x100000) - return addr; - - for (i = 0; i < ARRAY_SIZE(fixed_map); i++) { - u32 ofs; - - if (addr < fixed_map[i].phys) - continue; - - ofs = addr - fixed_map[i].phys; - if (ofs > fixed_map[i].size) - continue; - - return fixed_map[i].mapped + ofs; - } - - if ((addr >= 0x18000000 && addr < 0x18c00000) || - (addr >= 0x70000000 && addr < 0x78000000) || - (addr >= 0x7c000000 && addr < 0x7c400000)) - return mt7921_reg_map_l1(dev, addr); - - dev_err(dev->mt76.dev, "Access currently unsupported address %08x\n", - addr); - - return 0; -} - -static u32 mt7921_rr(struct mt76_dev *mdev, u32 offset) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - return dev->bus_ops->rr(mdev, addr); -} - -static void mt7921_wr(struct mt76_dev *mdev, u32 offset, u32 val) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - dev->bus_ops->wr(mdev, addr, val); -} - -static u32 mt7921_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - return dev->bus_ops->rmw(mdev, addr, mask, val); -} - static int mt7921_dma_disable(struct mt7921_dev *dev, bool force) { if (force) { @@ -380,20 +277,8 @@ int mt7921_wpdma_reinit_cond(struct mt7921_dev *dev) int mt7921_dma_init(struct mt7921_dev *dev) { - struct mt76_bus_ops *bus_ops; int ret; - dev->bus_ops = dev->mt76.bus; - bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops), - GFP_KERNEL); - if (!bus_ops) - return -ENOMEM; - - bus_ops->rr = mt7921_rr; - bus_ops->wr = mt7921_wr; - bus_ops->rmw = mt7921_rmw; - dev->mt76.bus = bus_ops; - mt76_dma_attach(&dev->mt76); ret = mt7921_dma_disable(dev, true); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c index 506a1909ce6d5..4119f8efd896b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c @@ -1306,8 +1306,6 @@ int mt7921_mcu_sta_update(struct mt7921_dev *dev, struct ieee80211_sta *sta, int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev) { - struct mt76_phy *mphy = &dev->mt76.phy; - struct mt76_connac_pm *pm = &dev->pm; int i, err = 0; for (i = 0; i < MT7921_DRV_OWN_RETRY_COUNT; i++) { @@ -1320,16 +1318,8 @@ int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev) if (i == MT7921_DRV_OWN_RETRY_COUNT) { dev_err(dev->mt76.dev, "driver own failed\n"); err = -EIO; - goto out; } - mt7921_wpdma_reinit_cond(dev); - clear_bit(MT76_STATE_PM, &mphy->state); - - pm->stats.last_wake_event = jiffies; - pm->stats.doze_time += pm->stats.last_wake_event - - pm->stats.last_doze_event; -out: return err; } @@ -1345,6 +1335,16 @@ int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev) goto out; err = __mt7921_mcu_drv_pmctrl(dev); + if (err < 0) + goto out; + + mt7921_wpdma_reinit_cond(dev); + clear_bit(MT76_STATE_PM, &mphy->state); + + pm->stats.last_wake_event = jiffies; + pm->stats.doze_time += pm->stats.last_wake_event - + pm->stats.last_doze_event; + out: mutex_unlock(&pm->mutex); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index c3905bcab3604..7d9b23a002388 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -88,6 +88,110 @@ static void mt7921_irq_tasklet(unsigned long data) napi_schedule(&dev->mt76.napi[MT_RXQ_MAIN]); } +static u32 __mt7921_reg_addr(struct mt7921_dev *dev, u32 addr) +{ + static const struct { + u32 phys; + u32 mapped; + u32 size; + } fixed_map[] = { + { 0x00400000, 0x80000, 0x10000}, /* WF_MCU_SYSRAM */ + { 0x00410000, 0x90000, 0x10000}, /* WF_MCU_SYSRAM (configure register) */ + { 0x40000000, 0x70000, 0x10000}, /* WF_UMAC_SYSRAM */ + { 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */ + { 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */ + { 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */ + { 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */ + { 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */ + { 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */ + { 0x7c060000, 0xe0000, 0x10000}, /* CONN_INFRA, conn_host_csr_top */ + { 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */ + { 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */ + { 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */ + { 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */ + { 0x820cc000, 0x0e000, 0x2000 }, /* WF_UMAC_TOP (PP) */ + { 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */ + { 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */ + { 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */ + { 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */ + { 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */ + { 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */ + { 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */ + { 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */ + { 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */ + { 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */ + { 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */ + { 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */ + { 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */ + { 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */ + { 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */ + { 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */ + { 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */ + { 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */ + { 0x820f3000, 0xa0c00, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_ARB) */ + { 0x820f4000, 0xa1000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_TMAC) */ + { 0x820f5000, 0xa1400, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_RMAC) */ + { 0x820f7000, 0xa1e00, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_DMA) */ + { 0x820f9000, 0xa3400, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_WTBLOFF) */ + { 0x820fa000, 0xa4000, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_ETBF) */ + { 0x820fb000, 0xa4200, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_LPON) */ + { 0x820fc000, 0xa4600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_INT) */ + { 0x820fd000, 0xa4800, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_MIB) */ + }; + int i; + + if (addr < 0x100000) + return addr; + + for (i = 0; i < ARRAY_SIZE(fixed_map); i++) { + u32 ofs; + + if (addr < fixed_map[i].phys) + continue; + + ofs = addr - fixed_map[i].phys; + if (ofs > fixed_map[i].size) + continue; + + return fixed_map[i].mapped + ofs; + } + + if ((addr >= 0x18000000 && addr < 0x18c00000) || + (addr >= 0x70000000 && addr < 0x78000000) || + (addr >= 0x7c000000 && addr < 0x7c400000)) + return mt7921_reg_map_l1(dev, addr); + + dev_err(dev->mt76.dev, "Access currently unsupported address %08x\n", + addr); + + return 0; +} + +static u32 mt7921_rr(struct mt76_dev *mdev, u32 offset) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + return dev->bus_ops->rr(mdev, addr); +} + +static void mt7921_wr(struct mt76_dev *mdev, u32 offset, u32 val) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + dev->bus_ops->wr(mdev, addr, val); +} + +static u32 mt7921_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + return dev->bus_ops->rmw(mdev, addr, mask, val); +} + + static int mt7921_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -110,6 +214,7 @@ static int mt7921_pci_probe(struct pci_dev *pdev, .sta_remove = mt7921_mac_sta_remove, .update_survey = mt7921_update_channel, }; + struct mt76_bus_ops *bus_ops; struct mt7921_dev *dev; struct mt76_dev *mdev; int ret; @@ -145,6 +250,22 @@ static int mt7921_pci_probe(struct pci_dev *pdev, mt76_mmio_init(&dev->mt76, pcim_iomap_table(pdev)[0]); tasklet_init(&dev->irq_tasklet, mt7921_irq_tasklet, (unsigned long)dev); + + dev->bus_ops = dev->mt76.bus; + bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops), + GFP_KERNEL); + if (!bus_ops) + return -ENOMEM; + + bus_ops->rr = mt7921_rr; + bus_ops->wr = mt7921_wr; + bus_ops->rmw = mt7921_rmw; + dev->mt76.bus = bus_ops; + + ret = __mt7921_mcu_drv_pmctrl(dev); + if (ret) + return ret; + mdev->rev = (mt7921_l1_rr(dev, MT_HW_CHIPID) << 16) | (mt7921_l1_rr(dev, MT_HW_REV) & 0xff); dev_err(mdev->dev, "ASIC revision: %04x\n", mdev->rev); -- GitLab From 69c5d307dce1560fafcb852f39d7a1bf5e266641 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Mon, 23 May 2022 19:11:02 +0100 Subject: [PATCH 0129/4335] lockdown: also lock down previous kgdb use commit eadb2f47a3ced5c64b23b90fd2a3463f63726066 upstream. KGDB and KDB allow read and write access to kernel memory, and thus should be restricted during lockdown. An attacker with access to a serial port (for example, via a hypervisor console, which some cloud vendors provide over the network) could trigger the debugger so it is important that the debugger respect the lockdown mode when/if it is triggered. Fix this by integrating lockdown into kdb's existing permissions mechanism. Unfortunately kgdb does not have any permissions mechanism (although it certainly could be added later) so, for now, kgdb is simply and brutally disabled by immediately exiting the gdb stub without taking any action. For lockdowns established early in the boot (e.g. the normal case) then this should be fine but on systems where kgdb has set breakpoints before the lockdown is enacted than "bad things" will happen. CVE: CVE-2022-21499 Co-developed-by: Stephen Brennan Signed-off-by: Stephen Brennan Reviewed-by: Douglas Anderson Signed-off-by: Daniel Thompson Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/security.h | 2 ++ kernel/debug/debug_core.c | 24 ++++++++++++++ kernel/debug/kdb/kdb_main.c | 62 +++++++++++++++++++++++++++++++++++-- security/security.c | 2 ++ 4 files changed, 87 insertions(+), 3 deletions(-) diff --git a/include/linux/security.h b/include/linux/security.h index 46a02ce34d00b..da184e7b361f4 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -121,10 +121,12 @@ enum lockdown_reason { LOCKDOWN_DEBUGFS, LOCKDOWN_XMON_WR, LOCKDOWN_BPF_WRITE_USER, + LOCKDOWN_DBG_WRITE_KERNEL, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, LOCKDOWN_BPF_READ_KERNEL, + LOCKDOWN_DBG_READ_KERNEL, LOCKDOWN_PERF, LOCKDOWN_TRACEFS, LOCKDOWN_XMON_RW, diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index da06a5553835b..7beceb447211d 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -752,6 +753,29 @@ cpu_master_loop: continue; kgdb_connected = 0; } else { + /* + * This is a brutal way to interfere with the debugger + * and prevent gdb being used to poke at kernel memory. + * This could cause trouble if lockdown is applied when + * there is already an active gdb session. For now the + * answer is simply "don't do that". Typically lockdown + * *will* be applied before the debug core gets started + * so only developers using kgdb for fairly advanced + * early kernel debug can be biten by this. Hopefully + * they are sophisticated enough to take care of + * themselves, especially with help from the lockdown + * message printed on the console! + */ + if (security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL)) { + if (IS_ENABLED(CONFIG_KGDB_KDB)) { + /* Switch back to kdb if possible... */ + dbg_kdb_mode = 1; + continue; + } else { + /* ... otherwise just bail */ + break; + } + } error = gdb_serial_stub(ks); } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 0852a537dad4c..ead4da9471270 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "kdb_private.h" #undef MODULE_PARAM_PREFIX @@ -166,10 +167,62 @@ struct task_struct *kdb_curr_task(int cpu) } /* - * Check whether the flags of the current command and the permissions - * of the kdb console has allow a command to be run. + * Update the permissions flags (kdb_cmd_enabled) to match the + * current lockdown state. + * + * Within this function the calls to security_locked_down() are "lazy". We + * avoid calling them if the current value of kdb_cmd_enabled already excludes + * flags that might be subject to lockdown. Additionally we deliberately check + * the lockdown flags independently (even though read lockdown implies write + * lockdown) since that results in both simpler code and clearer messages to + * the user on first-time debugger entry. + * + * The permission masks during a read+write lockdown permits the following + * flags: INSPECT, SIGNAL, REBOOT (and ALWAYS_SAFE). + * + * The INSPECT commands are not blocked during lockdown because they are + * not arbitrary memory reads. INSPECT covers the backtrace family (sometimes + * forcing them to have no arguments) and lsmod. These commands do expose + * some kernel state but do not allow the developer seated at the console to + * choose what state is reported. SIGNAL and REBOOT should not be controversial, + * given these are allowed for root during lockdown already. + */ +static void kdb_check_for_lockdown(void) +{ + const int write_flags = KDB_ENABLE_MEM_WRITE | + KDB_ENABLE_REG_WRITE | + KDB_ENABLE_FLOW_CTRL; + const int read_flags = KDB_ENABLE_MEM_READ | + KDB_ENABLE_REG_READ; + + bool need_to_lockdown_write = false; + bool need_to_lockdown_read = false; + + if (kdb_cmd_enabled & (KDB_ENABLE_ALL | write_flags)) + need_to_lockdown_write = + security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL); + + if (kdb_cmd_enabled & (KDB_ENABLE_ALL | read_flags)) + need_to_lockdown_read = + security_locked_down(LOCKDOWN_DBG_READ_KERNEL); + + /* De-compose KDB_ENABLE_ALL if required */ + if (need_to_lockdown_write || need_to_lockdown_read) + if (kdb_cmd_enabled & KDB_ENABLE_ALL) + kdb_cmd_enabled = KDB_ENABLE_MASK & ~KDB_ENABLE_ALL; + + if (need_to_lockdown_write) + kdb_cmd_enabled &= ~write_flags; + + if (need_to_lockdown_read) + kdb_cmd_enabled &= ~read_flags; +} + +/* + * Check whether the flags of the current command, the permissions of the kdb + * console and the lockdown state allow a command to be run. */ -static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions, +static bool kdb_check_flags(kdb_cmdflags_t flags, int permissions, bool no_args) { /* permissions comes from userspace so needs massaging slightly */ @@ -1180,6 +1233,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, kdb_curr_task(raw_smp_processor_id()); KDB_DEBUG_STATE("kdb_local 1", reason); + + kdb_check_for_lockdown(); + kdb_go_count = 0; if (reason == KDB_REASON_DEBUG) { /* special case below */ diff --git a/security/security.c b/security/security.c index da631339e9693..7b9f9d3fffe52 100644 --- a/security/security.c +++ b/security/security.c @@ -59,10 +59,12 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = { [LOCKDOWN_DEBUGFS] = "debugfs access", [LOCKDOWN_XMON_WR] = "xmon write access", [LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM", + [LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM", [LOCKDOWN_INTEGRITY_MAX] = "integrity", [LOCKDOWN_KCORE] = "/proc/kcore access", [LOCKDOWN_KPROBES] = "use of kprobes", [LOCKDOWN_BPF_READ_KERNEL] = "use of bpf to read kernel RAM", + [LOCKDOWN_DBG_READ_KERNEL] = "use of kgdb/kdb to read kernel RAM", [LOCKDOWN_PERF] = "unsafe use of perf", [LOCKDOWN_TRACEFS] = "use of tracefs", [LOCKDOWN_XMON_RW] = "xmon read and write access", -- GitLab From e7647ddf0ac50bac6ec6cc954ccaa95abac7bf69 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 14 May 2022 10:31:47 +0800 Subject: [PATCH 0130/4335] i2c: mt7621: fix missing clk_disable_unprepare() on error in mtk_i2c_probe() [ Upstream commit a2537c98a8a3b57002e54a262d180b9490bc7190 ] Fix the missing clk_disable_unprepare() before return from mtk_i2c_probe() in the error handling case. Fixes: d04913ec5f89 ("i2c: mt7621: Add MediaTek MT7621/7628/7688 I2C driver") Signed-off-by: Yang Yingliang Reviewed-by: Stefan Roese Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-mt7621.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt7621.c b/drivers/i2c/busses/i2c-mt7621.c index 45fe4a7fe0c03..901f0fb04fee4 100644 --- a/drivers/i2c/busses/i2c-mt7621.c +++ b/drivers/i2c/busses/i2c-mt7621.c @@ -304,7 +304,8 @@ static int mtk_i2c_probe(struct platform_device *pdev) if (i2c->bus_freq == 0) { dev_warn(i2c->dev, "clock-frequency 0 not supported\n"); - return -EINVAL; + ret = -EINVAL; + goto err_disable_clk; } adap = &i2c->adap; @@ -322,10 +323,15 @@ static int mtk_i2c_probe(struct platform_device *pdev) ret = i2c_add_adapter(adap); if (ret < 0) - return ret; + goto err_disable_clk; dev_info(&pdev->dev, "clock %u kHz\n", i2c->bus_freq / 1000); + return 0; + +err_disable_clk: + clk_disable_unprepare(i2c->clk); + return ret; } -- GitLab From 94bf8bfb009fad247d02f12e4c443411c8445412 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 21 May 2022 08:18:28 +0100 Subject: [PATCH 0131/4335] afs: Fix afs_getattr() to refetch file status if callback break occurred [ Upstream commit 2aeb8c86d49967552394d5e723f87454cb53f501 ] If a callback break occurs (change notification), afs_getattr() needs to issue an FS.FetchStatus RPC operation to update the status of the file being examined by the stat-family of system calls. Fix afs_getattr() to do this if AFS_VNODE_CB_PROMISED has been cleared on a vnode by a callback break. Skip this if AT_STATX_DONT_SYNC is set. This can be tested by appending to a file on one AFS client and then using "stat -L" to examine its length on a machine running kafs. This can also be watched through tracing on the kafs machine. The callback break is seen: kworker/1:1-46 [001] ..... 978.910812: afs_cb_call: c=0000005f YFSCB.CallBack kworker/1:1-46 [001] ...1. 978.910829: afs_cb_break: 100058:23b4c:242d2c2 b=2 s=1 break-cb kworker/1:1-46 [001] ..... 978.911062: afs_call_done: c=0000005f ret=0 ab=0 [0000000082994ead] And then the stat command generated no traffic if unpatched, but with this change a call to fetch the status can be observed: stat-4471 [000] ..... 986.744122: afs_make_fs_call: c=000000ab 100058:023b4c:242d2c2 YFS.FetchStatus stat-4471 [000] ..... 986.745578: afs_call_done: c=000000ab ret=0 ab=0 [0000000087fc8c84] Fixes: 08e0e7c82eea ("[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC.") Reported-by: Markus Suvanto Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Tested-by: Markus Suvanto Tested-by: kafs-testing+fedora34_64checkkafs-build-496@auristor.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=216010 Link: https://lore.kernel.org/r/165308359800.162686.14122417881564420962.stgit@warthog.procyon.org.uk/ # v1 Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/afs/inode.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 8fcffea2daf50..a47666ba48f56 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -728,10 +728,22 @@ int afs_getattr(struct user_namespace *mnt_userns, const struct path *path, { struct inode *inode = d_inode(path->dentry); struct afs_vnode *vnode = AFS_FS_I(inode); - int seq = 0; + struct key *key; + int ret, seq = 0; _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); + if (!(query_flags & AT_STATX_DONT_SYNC) && + !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) + return PTR_ERR(key); + ret = afs_validate(vnode, key); + key_put(key); + if (ret < 0) + return ret; + } + do { read_seqbegin_or_lock(&vnode->cb_lock, &seq); generic_fillattr(&init_user_ns, inode, stat); -- GitLab From c5871dddc145ac70916dac7c3f8366aba55ed9f3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 25 May 2022 09:57:37 +0200 Subject: [PATCH 0132/4335] Linux 5.15.42 Link: https://lore.kernel.org/r/20220523165823.492309987@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Shuah Khan Tested-by: Linux Kernel Functional Testing Tested-by: Fox Chen Tested-by: Sudip Mukherjee Tested-by: Ron Economos Tested-by: Guenter Roeck Tested-by: Khalid Masum Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c940e6542c8fd..6ad7aabaa7d67 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 41 +SUBLEVEL = 42 EXTRAVERSION = NAME = Trick or Treat -- GitLab From d12cd0bf5d262887eeaa407744618255bde76565 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 17 May 2022 11:02:12 -0700 Subject: [PATCH 0133/4335] mptcp: Do TCP fallback on early DSS checksum failure commit ae66fb2ba6c3dcaf8b9612b65aa949a1a4bed150 upstream. RFC 8684 section 3.7 describes several opportunities for a MPTCP connection to "fall back" to regular TCP early in the connection process, before it has been confirmed that MPTCP options can be successfully propagated on all SYN, SYN/ACK, and data packets. If a peer acknowledges the first received data packet with a regular TCP header (no MPTCP options), fallback is allowed. If the recipient of that first data packet finds a MPTCP DSS checksum error, this provides an opportunity to fail gracefully with a TCP fallback rather than resetting the connection (as might happen if a checksum failure were detected later). This commit modifies the checksum failure code to attempt fallback on the initial subflow of a MPTCP connection, only if it's a failure in the first data mapping. In cases where the peer initiates the connection, requests checksums, is the first to send data, and the peer is sending incorrect checksums (see https://github.com/multipath-tcp/mptcp_net-next/issues/275), this allows the connection to proceed as TCP rather than reset. Fixes: dd8bcd1768ff ("mptcp: validate the data checksum") Acked-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller [mathew.j.martineau: backport: Resolved bitfield conflict in protocol.h] Signed-off-by: Mat Martineau Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.h | 3 ++- net/mptcp/subflow.c | 21 ++++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 72a259a74b575..e193b710b471a 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -436,7 +436,8 @@ struct mptcp_subflow_context { rx_eof : 1, can_ack : 1, /* only after processing the remote a key */ disposable : 1, /* ctx can be free at ulp release time */ - stale : 1; /* unable to snd/rcv data, do not use for xmit */ + stale : 1, /* unable to snd/rcv data, do not use for xmit */ + valid_csum_seen : 1; /* at least one csum validated */ enum mptcp_data_avail data_avail; u32 remote_nonce; u64 thmac; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9c7deffe7cb6e..5ef9013b94c74 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -913,11 +913,14 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * subflow->map_data_csum); if (unlikely(csum)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); - subflow->send_mp_fail = 1; - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX); + if (subflow->mp_join || subflow->valid_csum_seen) { + subflow->send_mp_fail = 1; + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX); + } return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; } + subflow->valid_csum_seen = 1; return MAPPING_OK; } @@ -1099,6 +1102,18 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss } } +static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) +{ + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + + if (subflow->mp_join) + return false; + else if (READ_ONCE(msk->csum_enabled)) + return !subflow->valid_csum_seen; + else + return !subflow->fully_established; +} + static bool subflow_check_data_avail(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -1176,7 +1191,7 @@ fallback: return true; } - if (subflow->mp_join || subflow->fully_established) { + if (!subflow_can_fallback(subflow)) { /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ -- GitLab From 0e5bb338bf471ec46924f744c4301751bab8793a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 25 May 2022 14:42:07 +0200 Subject: [PATCH 0134/4335] Linux 5.15.43 Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6ad7aabaa7d67..6192e6be49c36 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 42 +SUBLEVEL = 43 EXTRAVERSION = NAME = Trick or Treat -- GitLab From f82ccfa4835b84e6e74c68e825d8b2f32fe249b8 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Mon, 9 May 2022 18:50:20 +0530 Subject: [PATCH 0135/4335] HID: amd_sfh: Add support for sensor discovery commit b5d7f43e97dabfa04a4be5ff027ce7da119332be upstream. Sensor discovery status fails in case of broken sensors or platform not supported. Hence disable driver on failure of sensor discovery. Signed-off-by: Mario Limonciello Signed-off-by: Basavaraj Natikar Signed-off-by: Jiri Kosina Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/hid/amd-sfh-hid/amd_sfh_client.c | 11 +++++++++++ drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 7 +++++++ drivers/hid/amd-sfh-hid/amd_sfh_pcie.h | 4 ++++ 3 files changed, 22 insertions(+) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index 840fd075c56f1..6284db50ec9bf 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -226,6 +226,17 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) dev_dbg(dev, "sid 0x%x status 0x%x\n", cl_data->sensor_idx[i], cl_data->sensor_sts[i]); } + if (privdata->mp2_ops->discovery_status && + privdata->mp2_ops->discovery_status(privdata) == 0) { + amd_sfh_hid_client_deinit(privdata); + for (i = 0; i < cl_data->num_hid_devices; i++) { + devm_kfree(dev, cl_data->feature_report[i]); + devm_kfree(dev, in_data->input_report[i]); + devm_kfree(dev, cl_data->report_descr[i]); + } + dev_warn(dev, "Failed to discover, sensors not enabled\n"); + return -EOPNOTSUPP; + } schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP)); return 0; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index 561bb27f42b10..ae8f1f2536e94 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -126,6 +126,12 @@ static int amd_sfh_irq_init_v2(struct amd_mp2_dev *privdata) return 0; } +static int amd_sfh_dis_sts_v2(struct amd_mp2_dev *privdata) +{ + return (readl(privdata->mmio + AMD_P2C_MSG(1)) & + SENSOR_DISCOVERY_STATUS_MASK) >> SENSOR_DISCOVERY_STATUS_SHIFT; +} + void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info) { union sfh_cmd_param cmd_param; @@ -241,6 +247,7 @@ static const struct amd_mp2_ops amd_sfh_ops_v2 = { .response = amd_sfh_wait_response_v2, .clear_intr = amd_sfh_clear_intr_v2, .init_intr = amd_sfh_irq_init_v2, + .discovery_status = amd_sfh_dis_sts_v2, }; static const struct amd_mp2_ops amd_sfh_ops = { diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h index 00fc083dc1239..2d3203d3daeb3 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h @@ -38,6 +38,9 @@ #define AMD_SFH_IDLE_LOOP 200 +#define SENSOR_DISCOVERY_STATUS_MASK GENMASK(5, 3) +#define SENSOR_DISCOVERY_STATUS_SHIFT 3 + /* SFH Command register */ union sfh_cmd_base { u32 ul; @@ -142,5 +145,6 @@ struct amd_mp2_ops { int (*response)(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_sts); void (*clear_intr)(struct amd_mp2_dev *privdata); int (*init_intr)(struct amd_mp2_dev *privdata); + int (*discovery_status)(struct amd_mp2_dev *privdata); }; #endif -- GitLab From acd12d16528152b32fa09be2c5ef95047f69af05 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 20 May 2022 13:48:11 -0400 Subject: [PATCH 0136/4335] KVM: x86/mmu: fix NULL pointer dereference on guest INVPCID commit 9f46c187e2e680ecd9de7983e4d081c3391acc76 upstream. With shadow paging enabled, the INVPCID instruction results in a call to kvm_mmu_invpcid_gva. If INVPCID is executed with CR0.PG=0, the invlpg callback is not set and the result is a NULL pointer dereference. Fix it trivially by checking for mmu->invlpg before every call. There are other possibilities: - check for CR0.PG, because KVM (like all Intel processors after P5) flushes guest TLB on CR0.PG changes so that INVPCID/INVLPG are a nop with paging disabled - check for EFER.LMA, because KVM syncs and flushes when switching MMU contexts outside of 64-bit mode All of these are tricky, go for the simple solution. This is CVE-2022-1789. Reported-by: Yongkang Jia Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini [fix conflict due to missing b9e5603c2a3accbadfec570ac501a54431a6bdba] Signed-off-by: Vegard Nossum Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu/mmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c5fd00945e752..3417405077adf 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5396,14 +5396,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) uint i; if (pcid == kvm_get_active_pcid(vcpu)) { - mmu->invlpg(vcpu, gva, mmu->root_hpa); + if (mmu->invlpg) + mmu->invlpg(vcpu, gva, mmu->root_hpa); tlb_flush = true; } for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { if (VALID_PAGE(mmu->prev_roots[i].hpa) && pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) { - mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); + if (mmu->invlpg) + mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); tlb_flush = true; } } -- GitLab From bf5a3c51e9f7af02f6e0fff01751b623c68f2263 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Wed, 25 May 2022 09:19:53 +0200 Subject: [PATCH 0137/4335] ice: fix crash at allocation failure Fix a crash in the zero-copy driver that occurs when it fails to allocate buffers from user-space. This crash can easily be triggered by a malicious program that does not provide any buffers in the fill ring for the kernel to use. Note that this bug does not exist in upstream since the batched buffer allocation interface got introduced in 5.16 and replaced this code. Reported-by: Jeff Shaw Tested-by: Jeff Shaw Signed-off-by: Magnus Karlsson Acked-by: Maciej Fijalkowski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ice/ice_xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 2b1873061912d..5581747947e57 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -378,7 +378,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count) do { *xdp = xsk_buff_alloc(rx_ring->xsk_pool); - if (!xdp) { + if (!*xdp) { ok = false; break; } -- GitLab From 0da5349e225490bfb7c1297301c8c0ad38f61504 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 7 Apr 2022 11:51:20 +0100 Subject: [PATCH 0138/4335] ACPI: sysfs: Fix BERT error region memory mapping commit 1bbc21785b7336619fb6a67f1fff5afdaf229acc upstream. Currently the sysfs interface maps the BERT error region as "memory" (through acpi_os_map_memory()) in order to copy the error records into memory buffers through memory operations (eg memory_read_from_buffer()). The OS system cannot detect whether the BERT error region is part of system RAM or it is "device memory" (eg BMC memory) and therefore it cannot detect which memory attributes the bus to memory support (and corresponding kernel mapping, unless firmware provides the required information). The acpi_os_map_memory() arch backend implementation determines the mapping attributes. On arm64, if the BERT error region is not present in the EFI memory map, the error region is mapped as device-nGnRnE; this triggers alignment faults since memcpy unaligned accesses are not allowed in device-nGnRnE regions. The ACPI sysfs code cannot therefore map by default the BERT error region with memory semantics but should use a safer default. Change the sysfs code to map the BERT error region as MMIO (through acpi_os_map_iomem()) and use the memcpy_fromio() interface to read the error region into the kernel buffer. Link: https://lore.kernel.org/linux-arm-kernel/31ffe8fc-f5ee-2858-26c5-0fd8bdd68702@arm.com Link: https://lore.kernel.org/linux-acpi/CAJZ5v0g+OVbhuUUDrLUCfX_mVqY_e8ubgLTU98=jfjTeb4t+Pw@mail.gmail.com Signed-off-by: Lorenzo Pieralisi Tested-by: Veronika Kabatova Tested-by: Aristeu Rozanski Acked-by: Ard Biesheuvel Signed-off-by: Rafael J. Wysocki Cc: dann frazier Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/sysfs.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 00c0ebaab29f7..6e23b76aef5dc 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -415,19 +415,30 @@ static ssize_t acpi_data_show(struct file *filp, struct kobject *kobj, loff_t offset, size_t count) { struct acpi_data_attr *data_attr; - void *base; - ssize_t rc; + void __iomem *base; + ssize_t size; data_attr = container_of(bin_attr, struct acpi_data_attr, attr); + size = data_attr->attr.size; + + if (offset < 0) + return -EINVAL; + + if (offset >= size) + return 0; - base = acpi_os_map_memory(data_attr->addr, data_attr->attr.size); + if (count > size - offset) + count = size - offset; + + base = acpi_os_map_iomem(data_attr->addr, size); if (!base) return -ENOMEM; - rc = memory_read_from_buffer(buf, count, &offset, base, - data_attr->attr.size); - acpi_os_unmap_memory(base, data_attr->attr.size); - return rc; + memcpy_fromio(buf, base + offset, count); + + acpi_os_unmap_iomem(base, size); + + return count; } static int acpi_bert_data_init(void *th, struct acpi_data_attr *data_attr) -- GitLab From 12d7163380a242e719fa0601cc074a14716cc54d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 30 Nov 2021 13:43:15 -0500 Subject: [PATCH 0139/4335] MAINTAINERS: co-maintain random.c commit 58e1100fdc5990b0cc0d4beaf2562a92e621ac7d upstream. random.c is a bit understaffed, and folks want more prompt reviews. I've got the crypto background and the interest to do these reviews, and have authored parts of the file already. Cc: Theodore Ts'o Cc: Greg Kroah-Hartman Signed-off-by: Jason A. Donenfeld Signed-off-by: Linus Torvalds Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c8103e57a70be..9b581b82f915f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15720,6 +15720,7 @@ F: arch/mips/generic/board-ranchu.c RANDOM NUMBER DRIVER M: "Theodore Ts'o" +M: Jason A. Donenfeld S: Maintained F: drivers/char/random.c -- GitLab From 830ecbae41a73ed5503c19c8d789f6bb758d843c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 25 Dec 2021 01:50:07 +0100 Subject: [PATCH 0140/4335] MAINTAINERS: add git tree for random.c commit 9bafaa9375cbf892033f188d8cb624ae328754b5 upstream. This is handy not just for humans, but also so that the 0-day bot can automatically test posted mailing list patches against the right tree. Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9b581b82f915f..942e0b173f2cc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15721,6 +15721,7 @@ F: arch/mips/generic/board-ranchu.c RANDOM NUMBER DRIVER M: "Theodore Ts'o" M: Jason A. Donenfeld +T: git https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git S: Maintained F: drivers/char/random.c -- GitLab From caba66ec322e22a65c388ae81b23942c7f51f982 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 22 Dec 2021 14:56:58 +0100 Subject: [PATCH 0141/4335] lib/crypto: blake2s: include as built-in commit 6048fdcc5f269c7f31d774c295ce59081b36e6f9 upstream. In preparation for using blake2s in the RNG, we change the way that it is wired-in to the build system. Instead of using ifdefs to select the right symbol, we use weak symbols. And because ARM doesn't need the generic implementation, we make the generic one default only if an arch library doesn't need it already, and then have arch libraries that do need it opt-in. So that the arch libraries can remain tristate rather than bool, we then split the shash part from the glue code. Acked-by: Herbert Xu Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Cc: Masahiro Yamada Cc: linux-kbuild@vger.kernel.org Cc: linux-crypto@vger.kernel.org Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/arm/crypto/Makefile | 4 +- arch/arm/crypto/blake2s-core.S | 8 ++-- arch/arm/crypto/blake2s-glue.c | 73 +---------------------------- arch/arm/crypto/blake2s-shash.c | 75 ++++++++++++++++++++++++++++++ arch/x86/crypto/Makefile | 4 +- arch/x86/crypto/blake2s-glue.c | 68 +++------------------------ arch/x86/crypto/blake2s-shash.c | 77 +++++++++++++++++++++++++++++++ crypto/Kconfig | 3 +- drivers/net/Kconfig | 1 - include/crypto/internal/blake2s.h | 6 +-- lib/crypto/Kconfig | 23 +++------ lib/crypto/Makefile | 9 ++-- lib/crypto/blake2s-generic.c | 6 ++- lib/crypto/blake2s.c | 6 --- 14 files changed, 189 insertions(+), 174 deletions(-) create mode 100644 arch/arm/crypto/blake2s-shash.c create mode 100644 arch/x86/crypto/blake2s-shash.c diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index eafa898ba6a73..0274f81cc8ea0 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += blake2s-arm.o +obj-$(if $(CONFIG_CRYPTO_BLAKE2S_ARM),y) += libblake2s-arm.o obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o @@ -31,7 +32,8 @@ sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y) -blake2s-arm-y := blake2s-core.o blake2s-glue.o +blake2s-arm-y := blake2s-shash.o +libblake2s-arm-y:= blake2s-core.o blake2s-glue.o blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o diff --git a/arch/arm/crypto/blake2s-core.S b/arch/arm/crypto/blake2s-core.S index 86345751bbf3a..df40e46601f10 100644 --- a/arch/arm/crypto/blake2s-core.S +++ b/arch/arm/crypto/blake2s-core.S @@ -167,8 +167,8 @@ .endm // -// void blake2s_compress_arch(struct blake2s_state *state, -// const u8 *block, size_t nblocks, u32 inc); +// void blake2s_compress(struct blake2s_state *state, +// const u8 *block, size_t nblocks, u32 inc); // // Only the first three fields of struct blake2s_state are used: // u32 h[8]; (inout) @@ -176,7 +176,7 @@ // u32 f[2]; (in) // .align 5 -ENTRY(blake2s_compress_arch) +ENTRY(blake2s_compress) push {r0-r2,r4-r11,lr} // keep this an even number .Lnext_block: @@ -303,4 +303,4 @@ ENTRY(blake2s_compress_arch) str r3, [r12], #4 bne 1b b .Lcopy_block_done -ENDPROC(blake2s_compress_arch) +ENDPROC(blake2s_compress) diff --git a/arch/arm/crypto/blake2s-glue.c b/arch/arm/crypto/blake2s-glue.c index f2cc1e5fc9ec1..0238a70d9581e 100644 --- a/arch/arm/crypto/blake2s-glue.c +++ b/arch/arm/crypto/blake2s-glue.c @@ -1,78 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/* - * BLAKE2s digest algorithm, ARM scalar implementation - * - * Copyright 2020 Google LLC - */ #include -#include - #include /* defined in blake2s-core.S */ -EXPORT_SYMBOL(blake2s_compress_arch); - -static int crypto_blake2s_update_arm(struct shash_desc *desc, - const u8 *in, unsigned int inlen) -{ - return crypto_blake2s_update(desc, in, inlen, blake2s_compress_arch); -} - -static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out) -{ - return crypto_blake2s_final(desc, out, blake2s_compress_arch); -} - -#define BLAKE2S_ALG(name, driver_name, digest_size) \ - { \ - .base.cra_name = name, \ - .base.cra_driver_name = driver_name, \ - .base.cra_priority = 200, \ - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ - .base.cra_module = THIS_MODULE, \ - .digestsize = digest_size, \ - .setkey = crypto_blake2s_setkey, \ - .init = crypto_blake2s_init, \ - .update = crypto_blake2s_update_arm, \ - .final = crypto_blake2s_final_arm, \ - .descsize = sizeof(struct blake2s_state), \ - } - -static struct shash_alg blake2s_arm_algs[] = { - BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE), - BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE), - BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE), - BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE), -}; - -static int __init blake2s_arm_mod_init(void) -{ - return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? - crypto_register_shashes(blake2s_arm_algs, - ARRAY_SIZE(blake2s_arm_algs)) : 0; -} - -static void __exit blake2s_arm_mod_exit(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) - crypto_unregister_shashes(blake2s_arm_algs, - ARRAY_SIZE(blake2s_arm_algs)); -} - -module_init(blake2s_arm_mod_init); -module_exit(blake2s_arm_mod_exit); - -MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Eric Biggers "); -MODULE_ALIAS_CRYPTO("blake2s-128"); -MODULE_ALIAS_CRYPTO("blake2s-128-arm"); -MODULE_ALIAS_CRYPTO("blake2s-160"); -MODULE_ALIAS_CRYPTO("blake2s-160-arm"); -MODULE_ALIAS_CRYPTO("blake2s-224"); -MODULE_ALIAS_CRYPTO("blake2s-224-arm"); -MODULE_ALIAS_CRYPTO("blake2s-256"); -MODULE_ALIAS_CRYPTO("blake2s-256-arm"); +EXPORT_SYMBOL(blake2s_compress); diff --git a/arch/arm/crypto/blake2s-shash.c b/arch/arm/crypto/blake2s-shash.c new file mode 100644 index 0000000000000..17c1c3bfe2f50 --- /dev/null +++ b/arch/arm/crypto/blake2s-shash.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * BLAKE2s digest algorithm, ARM scalar implementation + * + * Copyright 2020 Google LLC + */ + +#include +#include + +#include + +static int crypto_blake2s_update_arm(struct shash_desc *desc, + const u8 *in, unsigned int inlen) +{ + return crypto_blake2s_update(desc, in, inlen, blake2s_compress); +} + +static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out) +{ + return crypto_blake2s_final(desc, out, blake2s_compress); +} + +#define BLAKE2S_ALG(name, driver_name, digest_size) \ + { \ + .base.cra_name = name, \ + .base.cra_driver_name = driver_name, \ + .base.cra_priority = 200, \ + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ + .base.cra_module = THIS_MODULE, \ + .digestsize = digest_size, \ + .setkey = crypto_blake2s_setkey, \ + .init = crypto_blake2s_init, \ + .update = crypto_blake2s_update_arm, \ + .final = crypto_blake2s_final_arm, \ + .descsize = sizeof(struct blake2s_state), \ + } + +static struct shash_alg blake2s_arm_algs[] = { + BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE), + BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE), + BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE), + BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE), +}; + +static int __init blake2s_arm_mod_init(void) +{ + return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? + crypto_register_shashes(blake2s_arm_algs, + ARRAY_SIZE(blake2s_arm_algs)) : 0; +} + +static void __exit blake2s_arm_mod_exit(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) + crypto_unregister_shashes(blake2s_arm_algs, + ARRAY_SIZE(blake2s_arm_algs)); +} + +module_init(blake2s_arm_mod_init); +module_exit(blake2s_arm_mod_exit); + +MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("blake2s-128"); +MODULE_ALIAS_CRYPTO("blake2s-128-arm"); +MODULE_ALIAS_CRYPTO("blake2s-160"); +MODULE_ALIAS_CRYPTO("blake2s-160-arm"); +MODULE_ALIAS_CRYPTO("blake2s-224"); +MODULE_ALIAS_CRYPTO("blake2s-224-arm"); +MODULE_ALIAS_CRYPTO("blake2s-256"); +MODULE_ALIAS_CRYPTO("blake2s-256-arm"); diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index f307c93fc90a7..c3af959648e62 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -62,7 +62,9 @@ obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o -blake2s-x86_64-y := blake2s-core.o blake2s-glue.o +blake2s-x86_64-y := blake2s-shash.o +obj-$(if $(CONFIG_CRYPTO_BLAKE2S_X86),y) += libblake2s-x86_64.o +libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c index a40365ab301ee..69853c13e8fb0 100644 --- a/arch/x86/crypto/blake2s-glue.c +++ b/arch/x86/crypto/blake2s-glue.c @@ -5,7 +5,6 @@ #include #include -#include #include #include @@ -28,9 +27,8 @@ asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); -void blake2s_compress_arch(struct blake2s_state *state, - const u8 *block, size_t nblocks, - const u32 inc) +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) { /* SIMD disables preemption, so relax after processing each page. */ BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); @@ -56,49 +54,12 @@ void blake2s_compress_arch(struct blake2s_state *state, block += blocks * BLAKE2S_BLOCK_SIZE; } while (nblocks); } -EXPORT_SYMBOL(blake2s_compress_arch); - -static int crypto_blake2s_update_x86(struct shash_desc *desc, - const u8 *in, unsigned int inlen) -{ - return crypto_blake2s_update(desc, in, inlen, blake2s_compress_arch); -} - -static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out) -{ - return crypto_blake2s_final(desc, out, blake2s_compress_arch); -} - -#define BLAKE2S_ALG(name, driver_name, digest_size) \ - { \ - .base.cra_name = name, \ - .base.cra_driver_name = driver_name, \ - .base.cra_priority = 200, \ - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ - .base.cra_module = THIS_MODULE, \ - .digestsize = digest_size, \ - .setkey = crypto_blake2s_setkey, \ - .init = crypto_blake2s_init, \ - .update = crypto_blake2s_update_x86, \ - .final = crypto_blake2s_final_x86, \ - .descsize = sizeof(struct blake2s_state), \ - } - -static struct shash_alg blake2s_algs[] = { - BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE), - BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE), - BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE), - BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE), -}; +EXPORT_SYMBOL(blake2s_compress); static int __init blake2s_mod_init(void) { - if (!boot_cpu_has(X86_FEATURE_SSSE3)) - return 0; - - static_branch_enable(&blake2s_use_ssse3); + if (boot_cpu_has(X86_FEATURE_SSSE3)) + static_branch_enable(&blake2s_use_ssse3); if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) && @@ -109,26 +70,9 @@ static int __init blake2s_mod_init(void) XFEATURE_MASK_AVX512, NULL)) static_branch_enable(&blake2s_use_avx512); - return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? - crypto_register_shashes(blake2s_algs, - ARRAY_SIZE(blake2s_algs)) : 0; -} - -static void __exit blake2s_mod_exit(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3)) - crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); + return 0; } module_init(blake2s_mod_init); -module_exit(blake2s_mod_exit); -MODULE_ALIAS_CRYPTO("blake2s-128"); -MODULE_ALIAS_CRYPTO("blake2s-128-x86"); -MODULE_ALIAS_CRYPTO("blake2s-160"); -MODULE_ALIAS_CRYPTO("blake2s-160-x86"); -MODULE_ALIAS_CRYPTO("blake2s-224"); -MODULE_ALIAS_CRYPTO("blake2s-224-x86"); -MODULE_ALIAS_CRYPTO("blake2s-256"); -MODULE_ALIAS_CRYPTO("blake2s-256-x86"); MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c new file mode 100644 index 0000000000000..f9e2fecdb761b --- /dev/null +++ b/arch/x86/crypto/blake2s-shash.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +static int crypto_blake2s_update_x86(struct shash_desc *desc, + const u8 *in, unsigned int inlen) +{ + return crypto_blake2s_update(desc, in, inlen, blake2s_compress); +} + +static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out) +{ + return crypto_blake2s_final(desc, out, blake2s_compress); +} + +#define BLAKE2S_ALG(name, driver_name, digest_size) \ + { \ + .base.cra_name = name, \ + .base.cra_driver_name = driver_name, \ + .base.cra_priority = 200, \ + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ + .base.cra_module = THIS_MODULE, \ + .digestsize = digest_size, \ + .setkey = crypto_blake2s_setkey, \ + .init = crypto_blake2s_init, \ + .update = crypto_blake2s_update_x86, \ + .final = crypto_blake2s_final_x86, \ + .descsize = sizeof(struct blake2s_state), \ + } + +static struct shash_alg blake2s_algs[] = { + BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE), + BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE), + BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE), + BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE), +}; + +static int __init blake2s_mod_init(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3)) + return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); + return 0; +} + +static void __exit blake2s_mod_exit(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); +} + +module_init(blake2s_mod_init); +module_exit(blake2s_mod_exit); + +MODULE_ALIAS_CRYPTO("blake2s-128"); +MODULE_ALIAS_CRYPTO("blake2s-128-x86"); +MODULE_ALIAS_CRYPTO("blake2s-160"); +MODULE_ALIAS_CRYPTO("blake2s-160-x86"); +MODULE_ALIAS_CRYPTO("blake2s-224"); +MODULE_ALIAS_CRYPTO("blake2s-224-x86"); +MODULE_ALIAS_CRYPTO("blake2s-256"); +MODULE_ALIAS_CRYPTO("blake2s-256-x86"); +MODULE_LICENSE("GPL v2"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 285f82647d2b7..55718de561375 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1919,9 +1919,10 @@ config CRYPTO_STATS config CRYPTO_HASH_INFO bool -source "lib/crypto/Kconfig" source "drivers/crypto/Kconfig" source "crypto/asymmetric_keys/Kconfig" source "certs/Kconfig" endif # if CRYPTO + +source "lib/crypto/Kconfig" diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index dd335ae1122b0..44a7d36446c52 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -81,7 +81,6 @@ config WIREGUARD select CRYPTO select CRYPTO_LIB_CURVE25519 select CRYPTO_LIB_CHACHA20POLY1305 - select CRYPTO_LIB_BLAKE2S select CRYPTO_CHACHA20_X86_64 if X86 && 64BIT select CRYPTO_POLY1305_X86_64 if X86 && 64BIT select CRYPTO_BLAKE2S_X86 if X86 && 64BIT diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index 8e50d487500f2..d39cfa0d333e9 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -11,11 +11,11 @@ #include #include -void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, +void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, size_t nblocks, const u32 inc); -void blake2s_compress_arch(struct blake2s_state *state,const u8 *block, - size_t nblocks, const u32 inc); +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc); bool blake2s_selftest(void); diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 545ccbddf6a1d..8620f38e117c0 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -comment "Crypto library routines" - config CRYPTO_LIB_AES tristate @@ -9,14 +7,14 @@ config CRYPTO_LIB_ARC4 tristate config CRYPTO_ARCH_HAVE_LIB_BLAKE2S - tristate + bool help Declares whether the architecture provides an arch-specific accelerated implementation of the Blake2s library interface, either builtin or as a module. config CRYPTO_LIB_BLAKE2S_GENERIC - tristate + def_bool !CRYPTO_ARCH_HAVE_LIB_BLAKE2S help This symbol can be depended upon by arch implementations of the Blake2s library interface that require the generic code as a @@ -24,15 +22,6 @@ config CRYPTO_LIB_BLAKE2S_GENERIC implementation is enabled, this implementation serves the users of CRYPTO_LIB_BLAKE2S. -config CRYPTO_LIB_BLAKE2S - tristate "BLAKE2s hash function library" - depends on CRYPTO_ARCH_HAVE_LIB_BLAKE2S || !CRYPTO_ARCH_HAVE_LIB_BLAKE2S - select CRYPTO_LIB_BLAKE2S_GENERIC if CRYPTO_ARCH_HAVE_LIB_BLAKE2S=n - help - Enable the Blake2s library interface. This interface may be fulfilled - by either the generic implementation or an arch-specific one, if one - is available and enabled. - config CRYPTO_ARCH_HAVE_LIB_CHACHA tristate help @@ -51,7 +40,7 @@ config CRYPTO_LIB_CHACHA_GENERIC of CRYPTO_LIB_CHACHA. config CRYPTO_LIB_CHACHA - tristate "ChaCha library interface" + tristate depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n help @@ -76,7 +65,7 @@ config CRYPTO_LIB_CURVE25519_GENERIC of CRYPTO_LIB_CURVE25519. config CRYPTO_LIB_CURVE25519 - tristate "Curve25519 scalar multiplication library" + tristate depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519 select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n help @@ -111,7 +100,7 @@ config CRYPTO_LIB_POLY1305_GENERIC of CRYPTO_LIB_POLY1305. config CRYPTO_LIB_POLY1305 - tristate "Poly1305 library interface" + tristate depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305 select CRYPTO_LIB_POLY1305_GENERIC if CRYPTO_ARCH_HAVE_LIB_POLY1305=n help @@ -120,7 +109,7 @@ config CRYPTO_LIB_POLY1305 is available and enabled. config CRYPTO_LIB_CHACHA20POLY1305 - tristate "ChaCha20-Poly1305 AEAD support (8-byte nonce library version)" + tristate depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305 select CRYPTO_LIB_CHACHA diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 73205ed269bad..ed43a41f2dcc8 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -10,11 +10,10 @@ libaes-y := aes.o obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o libarc4-y := arc4.o -obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += libblake2s-generic.o -libblake2s-generic-y += blake2s-generic.o - -obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o -libblake2s-y += blake2s.o +# blake2s is used by the /dev/random driver which is always builtin +obj-y += libblake2s.o +libblake2s-y := blake2s.o +libblake2s-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += blake2s-generic.o obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o libchacha20poly1305-y += chacha20poly1305.o diff --git a/lib/crypto/blake2s-generic.c b/lib/crypto/blake2s-generic.c index 04ff8df245136..75ccb3e633e65 100644 --- a/lib/crypto/blake2s-generic.c +++ b/lib/crypto/blake2s-generic.c @@ -37,7 +37,11 @@ static inline void blake2s_increment_counter(struct blake2s_state *state, state->t[1] += (state->t[0] < inc); } -void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) + __weak __alias(blake2s_compress_generic); + +void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, size_t nblocks, const u32 inc) { u32 m[16]; diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index 4055aa593ec49..93f2ae0513702 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -16,12 +16,6 @@ #include #include -#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S) -# define blake2s_compress blake2s_compress_arch -#else -# define blake2s_compress blake2s_compress_generic -#endif - void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) { __blake2s_update(state, in, inlen, blake2s_compress); -- GitLab From cba2195416d4c0719ba80fc2182ffa3cee71fb24 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 11 Jan 2022 14:37:41 +0100 Subject: [PATCH 0142/4335] lib/crypto: blake2s: move hmac construction into wireguard commit d8d83d8ab0a453e17e68b3a3bed1f940c34b8646 upstream. Basically nobody should use blake2s in an HMAC construction; it already has a keyed variant. But unfortunately for historical reasons, Noise, used by WireGuard, uses HKDF quite strictly, which means we have to use this. Because this really shouldn't be used by others, this commit moves it into wireguard's noise.c locally, so that kernels that aren't using WireGuard don't get this superfluous code baked in. On m68k systems, this shaves off ~314 bytes. Cc: Herbert Xu Tested-by: Geert Uytterhoeven Acked-by: Ard Biesheuvel Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireguard/noise.c | 45 ++++++++++++++++++++++++++++++----- include/crypto/blake2s.h | 3 --- lib/crypto/blake2s-selftest.c | 31 ------------------------ lib/crypto/blake2s.c | 37 ---------------------------- 4 files changed, 39 insertions(+), 77 deletions(-) diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c index c0cfd9b36c0b5..720952b92e784 100644 --- a/drivers/net/wireguard/noise.c +++ b/drivers/net/wireguard/noise.c @@ -302,6 +302,41 @@ void wg_noise_set_static_identity_private_key( static_identity->static_public, private_key); } +static void hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, const size_t keylen) +{ + struct blake2s_state state; + u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 }; + u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32)); + int i; + + if (keylen > BLAKE2S_BLOCK_SIZE) { + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, key, keylen); + blake2s_final(&state, x_key); + } else + memcpy(x_key, key, keylen); + + for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) + x_key[i] ^= 0x36; + + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); + blake2s_update(&state, in, inlen); + blake2s_final(&state, i_hash); + + for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) + x_key[i] ^= 0x5c ^ 0x36; + + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); + blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE); + blake2s_final(&state, i_hash); + + memcpy(out, i_hash, BLAKE2S_HASH_SIZE); + memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE); + memzero_explicit(i_hash, BLAKE2S_HASH_SIZE); +} + /* This is Hugo Krawczyk's HKDF: * - https://eprint.iacr.org/2010/264.pdf * - https://tools.ietf.org/html/rfc5869 @@ -322,14 +357,14 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, ((third_len || third_dst) && (!second_len || !second_dst)))); /* Extract entropy from data into secret */ - blake2s256_hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN); + hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN); if (!first_dst || !first_len) goto out; /* Expand first key: key = secret, data = 0x1 */ output[0] = 1; - blake2s256_hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE); + hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE); memcpy(first_dst, output, first_len); if (!second_dst || !second_len) @@ -337,8 +372,7 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, /* Expand second key: key = secret, data = first-key || 0x2 */ output[BLAKE2S_HASH_SIZE] = 2; - blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, - BLAKE2S_HASH_SIZE); + hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE); memcpy(second_dst, output, second_len); if (!third_dst || !third_len) @@ -346,8 +380,7 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, /* Expand third key: key = secret, data = second-key || 0x3 */ output[BLAKE2S_HASH_SIZE] = 3; - blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, - BLAKE2S_HASH_SIZE); + hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE); memcpy(third_dst, output, third_len); out: diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h index bc3fb59442ce5..4e30e1799e614 100644 --- a/include/crypto/blake2s.h +++ b/include/crypto/blake2s.h @@ -101,7 +101,4 @@ static inline void blake2s(u8 *out, const u8 *in, const u8 *key, blake2s_final(&state, out); } -void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, - const size_t keylen); - #endif /* _CRYPTO_BLAKE2S_H */ diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c index 5d9ea53be9736..409e4b7287704 100644 --- a/lib/crypto/blake2s-selftest.c +++ b/lib/crypto/blake2s-selftest.c @@ -15,7 +15,6 @@ * #include * * #include - * #include * * #define BLAKE2S_TESTVEC_COUNT 256 * @@ -58,16 +57,6 @@ * } * printf("};\n\n"); * - * printf("static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n"); - * - * HMAC(EVP_blake2s256(), key, sizeof(key), buf, sizeof(buf), hash, NULL); - * print_vec(hash, BLAKE2S_OUTBYTES); - * - * HMAC(EVP_blake2s256(), buf, sizeof(buf), key, sizeof(key), hash, NULL); - * print_vec(hash, BLAKE2S_OUTBYTES); - * - * printf("};\n"); - * * return 0; *} */ @@ -554,15 +543,6 @@ static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, }, }; -static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { - { 0xce, 0xe1, 0x57, 0x69, 0x82, 0xdc, 0xbf, 0x43, 0xad, 0x56, 0x4c, 0x70, - 0xed, 0x68, 0x16, 0x96, 0xcf, 0xa4, 0x73, 0xe8, 0xe8, 0xfc, 0x32, 0x79, - 0x08, 0x0a, 0x75, 0x82, 0xda, 0x3f, 0x05, 0x11, }, - { 0x77, 0x2f, 0x0c, 0x71, 0x41, 0xf4, 0x4b, 0x2b, 0xb3, 0xc6, 0xb6, 0xf9, - 0x60, 0xde, 0xe4, 0x52, 0x38, 0x66, 0xe8, 0xbf, 0x9b, 0x96, 0xc4, 0x9f, - 0x60, 0xd9, 0x24, 0x37, 0x99, 0xd6, 0xec, 0x31, }, -}; - bool __init blake2s_selftest(void) { u8 key[BLAKE2S_KEY_SIZE]; @@ -607,16 +587,5 @@ bool __init blake2s_selftest(void) } } - if (success) { - blake2s256_hmac(hash, buf, key, sizeof(buf), sizeof(key)); - success &= !memcmp(hash, blake2s_hmac_testvecs[0], BLAKE2S_HASH_SIZE); - - blake2s256_hmac(hash, key, buf, sizeof(key), sizeof(buf)); - success &= !memcmp(hash, blake2s_hmac_testvecs[1], BLAKE2S_HASH_SIZE); - - if (!success) - pr_err("blake2s256_hmac self-test: FAIL\n"); - } - return success; } diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index 93f2ae0513702..9364f79937b81 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -30,43 +30,6 @@ void blake2s_final(struct blake2s_state *state, u8 *out) } EXPORT_SYMBOL(blake2s_final); -void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, - const size_t keylen) -{ - struct blake2s_state state; - u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 }; - u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32)); - int i; - - if (keylen > BLAKE2S_BLOCK_SIZE) { - blake2s_init(&state, BLAKE2S_HASH_SIZE); - blake2s_update(&state, key, keylen); - blake2s_final(&state, x_key); - } else - memcpy(x_key, key, keylen); - - for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) - x_key[i] ^= 0x36; - - blake2s_init(&state, BLAKE2S_HASH_SIZE); - blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); - blake2s_update(&state, in, inlen); - blake2s_final(&state, i_hash); - - for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) - x_key[i] ^= 0x5c ^ 0x36; - - blake2s_init(&state, BLAKE2S_HASH_SIZE); - blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); - blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE); - blake2s_final(&state, i_hash); - - memcpy(out, i_hash, BLAKE2S_HASH_SIZE); - memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE); - memzero_explicit(i_hash, BLAKE2S_HASH_SIZE); -} -EXPORT_SYMBOL(blake2s256_hmac); - static int __init blake2s_mod_init(void) { if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && -- GitLab From b0cdd9ec844515d65000f0fc133a4badf661c519 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 11 Jan 2022 18:58:43 +0100 Subject: [PATCH 0143/4335] lib/crypto: sha1: re-roll loops to reduce code size commit 9a1536b093bb5bf60689021275fd24d513bb8db0 upstream. With SHA-1 no longer being used for anything performance oriented, and also soon to be phased out entirely, we can make up for the space added by unrolled BLAKE2s by simply re-rolling SHA-1. Since SHA-1 is so much more complex, re-rolling it more or less takes care of the code size added by BLAKE2s. And eventually, hopefully we'll see SHA-1 removed entirely from most small kernel builds. Cc: Herbert Xu Cc: Ard Biesheuvel Tested-by: Geert Uytterhoeven Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- lib/sha1.c | 95 ++++++++---------------------------------------------- 1 file changed, 14 insertions(+), 81 deletions(-) diff --git a/lib/sha1.c b/lib/sha1.c index 9bd1935a14727..0494766fc574e 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -55,7 +56,8 @@ #define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \ __u32 TEMP = input(t); setW(t, TEMP); \ E += TEMP + rol32(A,5) + (fn) + (constant); \ - B = ror32(B, 2); } while (0) + B = ror32(B, 2); \ + TEMP = E; E = D; D = C; C = B; B = A; A = TEMP; } while (0) #define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) #define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) @@ -84,6 +86,7 @@ void sha1_transform(__u32 *digest, const char *data, __u32 *array) { __u32 A, B, C, D, E; + unsigned int i = 0; A = digest[0]; B = digest[1]; @@ -92,94 +95,24 @@ void sha1_transform(__u32 *digest, const char *data, __u32 *array) E = digest[4]; /* Round 1 - iterations 0-16 take their input from 'data' */ - T_0_15( 0, A, B, C, D, E); - T_0_15( 1, E, A, B, C, D); - T_0_15( 2, D, E, A, B, C); - T_0_15( 3, C, D, E, A, B); - T_0_15( 4, B, C, D, E, A); - T_0_15( 5, A, B, C, D, E); - T_0_15( 6, E, A, B, C, D); - T_0_15( 7, D, E, A, B, C); - T_0_15( 8, C, D, E, A, B); - T_0_15( 9, B, C, D, E, A); - T_0_15(10, A, B, C, D, E); - T_0_15(11, E, A, B, C, D); - T_0_15(12, D, E, A, B, C); - T_0_15(13, C, D, E, A, B); - T_0_15(14, B, C, D, E, A); - T_0_15(15, A, B, C, D, E); + for (; i < 16; ++i) + T_0_15(i, A, B, C, D, E); /* Round 1 - tail. Input from 512-bit mixing array */ - T_16_19(16, E, A, B, C, D); - T_16_19(17, D, E, A, B, C); - T_16_19(18, C, D, E, A, B); - T_16_19(19, B, C, D, E, A); + for (; i < 20; ++i) + T_16_19(i, A, B, C, D, E); /* Round 2 */ - T_20_39(20, A, B, C, D, E); - T_20_39(21, E, A, B, C, D); - T_20_39(22, D, E, A, B, C); - T_20_39(23, C, D, E, A, B); - T_20_39(24, B, C, D, E, A); - T_20_39(25, A, B, C, D, E); - T_20_39(26, E, A, B, C, D); - T_20_39(27, D, E, A, B, C); - T_20_39(28, C, D, E, A, B); - T_20_39(29, B, C, D, E, A); - T_20_39(30, A, B, C, D, E); - T_20_39(31, E, A, B, C, D); - T_20_39(32, D, E, A, B, C); - T_20_39(33, C, D, E, A, B); - T_20_39(34, B, C, D, E, A); - T_20_39(35, A, B, C, D, E); - T_20_39(36, E, A, B, C, D); - T_20_39(37, D, E, A, B, C); - T_20_39(38, C, D, E, A, B); - T_20_39(39, B, C, D, E, A); + for (; i < 40; ++i) + T_20_39(i, A, B, C, D, E); /* Round 3 */ - T_40_59(40, A, B, C, D, E); - T_40_59(41, E, A, B, C, D); - T_40_59(42, D, E, A, B, C); - T_40_59(43, C, D, E, A, B); - T_40_59(44, B, C, D, E, A); - T_40_59(45, A, B, C, D, E); - T_40_59(46, E, A, B, C, D); - T_40_59(47, D, E, A, B, C); - T_40_59(48, C, D, E, A, B); - T_40_59(49, B, C, D, E, A); - T_40_59(50, A, B, C, D, E); - T_40_59(51, E, A, B, C, D); - T_40_59(52, D, E, A, B, C); - T_40_59(53, C, D, E, A, B); - T_40_59(54, B, C, D, E, A); - T_40_59(55, A, B, C, D, E); - T_40_59(56, E, A, B, C, D); - T_40_59(57, D, E, A, B, C); - T_40_59(58, C, D, E, A, B); - T_40_59(59, B, C, D, E, A); + for (; i < 60; ++i) + T_40_59(i, A, B, C, D, E); /* Round 4 */ - T_60_79(60, A, B, C, D, E); - T_60_79(61, E, A, B, C, D); - T_60_79(62, D, E, A, B, C); - T_60_79(63, C, D, E, A, B); - T_60_79(64, B, C, D, E, A); - T_60_79(65, A, B, C, D, E); - T_60_79(66, E, A, B, C, D); - T_60_79(67, D, E, A, B, C); - T_60_79(68, C, D, E, A, B); - T_60_79(69, B, C, D, E, A); - T_60_79(70, A, B, C, D, E); - T_60_79(71, E, A, B, C, D); - T_60_79(72, D, E, A, B, C); - T_60_79(73, C, D, E, A, B); - T_60_79(74, B, C, D, E, A); - T_60_79(75, A, B, C, D, E); - T_60_79(76, E, A, B, C, D); - T_60_79(77, D, E, A, B, C); - T_60_79(78, C, D, E, A, B); - T_60_79(79, B, C, D, E, A); + for (; i < 80; ++i) + T_60_79(i, A, B, C, D, E); digest[0] += A; digest[1] += B; -- GitLab From 45626449eb251304b18d7722a48486c7957f3012 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 19 Jan 2022 14:35:06 +0100 Subject: [PATCH 0144/4335] lib/crypto: blake2s: avoid indirect calls to compression function for Clang CFI commit d2a02e3c8bb6b347818518edff5a4b40ff52d6d8 upstream. blake2s_compress_generic is weakly aliased by blake2s_compress. The current harness for function selection uses a function pointer, which is ordinarily inlined and resolved at compile time. But when Clang's CFI is enabled, CFI still triggers when making an indirect call via a weak symbol. This seems like a bug in Clang's CFI, as though it's bucketing weak symbols and strong symbols differently. It also only seems to trigger when "full LTO" mode is used, rather than "thin LTO". [ 0.000000][ T0] Kernel panic - not syncing: CFI failure (target: blake2s_compress_generic+0x0/0x1444) [ 0.000000][ T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.16.0-mainline-06981-g076c855b846e #1 [ 0.000000][ T0] Hardware name: MT6873 (DT) [ 0.000000][ T0] Call trace: [ 0.000000][ T0] dump_backtrace+0xfc/0x1dc [ 0.000000][ T0] dump_stack_lvl+0xa8/0x11c [ 0.000000][ T0] panic+0x194/0x464 [ 0.000000][ T0] __cfi_check_fail+0x54/0x58 [ 0.000000][ T0] __cfi_slowpath_diag+0x354/0x4b0 [ 0.000000][ T0] blake2s_update+0x14c/0x178 [ 0.000000][ T0] _extract_entropy+0xf4/0x29c [ 0.000000][ T0] crng_initialize_primary+0x24/0x94 [ 0.000000][ T0] rand_initialize+0x2c/0x6c [ 0.000000][ T0] start_kernel+0x2f8/0x65c [ 0.000000][ T0] __primary_switched+0xc4/0x7be4 [ 0.000000][ T0] Rebooting in 5 seconds.. Nonetheless, the function pointer method isn't so terrific anyway, so this patch replaces it with a simple boolean, which also gets inlined away. This successfully works around the Clang bug. In general, I'm not too keen on all of the indirection involved here; it clearly does more harm than good. Hopefully the whole thing can get cleaned up down the road when lib/crypto is overhauled more comprehensively. But for now, we go with a simple bandaid. Fixes: 6048fdcc5f26 ("lib/crypto: blake2s: include as built-in") Link: https://github.com/ClangBuiltLinux/linux/issues/1567 Reported-by: Miles Chen Tested-by: Miles Chen Tested-by: Nathan Chancellor Tested-by: John Stultz Acked-by: Nick Desaulniers Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/arm/crypto/blake2s-shash.c | 4 ++-- arch/x86/crypto/blake2s-shash.c | 4 ++-- crypto/blake2s_generic.c | 4 ++-- include/crypto/internal/blake2s.h | 40 +++++++++++++++++++------------ lib/crypto/blake2s.c | 4 ++-- 5 files changed, 33 insertions(+), 23 deletions(-) diff --git a/arch/arm/crypto/blake2s-shash.c b/arch/arm/crypto/blake2s-shash.c index 17c1c3bfe2f50..763c73beea2d0 100644 --- a/arch/arm/crypto/blake2s-shash.c +++ b/arch/arm/crypto/blake2s-shash.c @@ -13,12 +13,12 @@ static int crypto_blake2s_update_arm(struct shash_desc *desc, const u8 *in, unsigned int inlen) { - return crypto_blake2s_update(desc, in, inlen, blake2s_compress); + return crypto_blake2s_update(desc, in, inlen, false); } static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out) { - return crypto_blake2s_final(desc, out, blake2s_compress); + return crypto_blake2s_final(desc, out, false); } #define BLAKE2S_ALG(name, driver_name, digest_size) \ diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c index f9e2fecdb761b..59ae28abe35cc 100644 --- a/arch/x86/crypto/blake2s-shash.c +++ b/arch/x86/crypto/blake2s-shash.c @@ -18,12 +18,12 @@ static int crypto_blake2s_update_x86(struct shash_desc *desc, const u8 *in, unsigned int inlen) { - return crypto_blake2s_update(desc, in, inlen, blake2s_compress); + return crypto_blake2s_update(desc, in, inlen, false); } static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out) { - return crypto_blake2s_final(desc, out, blake2s_compress); + return crypto_blake2s_final(desc, out, false); } #define BLAKE2S_ALG(name, driver_name, digest_size) \ diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c index 72fe480f9bd67..5f96a21f87883 100644 --- a/crypto/blake2s_generic.c +++ b/crypto/blake2s_generic.c @@ -15,12 +15,12 @@ static int crypto_blake2s_update_generic(struct shash_desc *desc, const u8 *in, unsigned int inlen) { - return crypto_blake2s_update(desc, in, inlen, blake2s_compress_generic); + return crypto_blake2s_update(desc, in, inlen, true); } static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out) { - return crypto_blake2s_final(desc, out, blake2s_compress_generic); + return crypto_blake2s_final(desc, out, true); } #define BLAKE2S_ALG(name, driver_name, digest_size) \ diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index d39cfa0d333e9..52363eee2b20e 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -24,14 +24,11 @@ static inline void blake2s_set_lastblock(struct blake2s_state *state) state->f[0] = -1; } -typedef void (*blake2s_compress_t)(struct blake2s_state *state, - const u8 *block, size_t nblocks, u32 inc); - /* Helper functions for BLAKE2s shared by the library and shash APIs */ -static inline void __blake2s_update(struct blake2s_state *state, - const u8 *in, size_t inlen, - blake2s_compress_t compress) +static __always_inline void +__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen, + bool force_generic) { const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; @@ -39,7 +36,12 @@ static inline void __blake2s_update(struct blake2s_state *state, return; if (inlen > fill) { memcpy(state->buf + state->buflen, in, fill); - (*compress)(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); + if (force_generic) + blake2s_compress_generic(state, state->buf, 1, + BLAKE2S_BLOCK_SIZE); + else + blake2s_compress(state, state->buf, 1, + BLAKE2S_BLOCK_SIZE); state->buflen = 0; in += fill; inlen -= fill; @@ -47,7 +49,12 @@ static inline void __blake2s_update(struct blake2s_state *state, if (inlen > BLAKE2S_BLOCK_SIZE) { const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); /* Hash one less (full) block than strictly possible */ - (*compress)(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); + if (force_generic) + blake2s_compress_generic(state, in, nblocks - 1, + BLAKE2S_BLOCK_SIZE); + else + blake2s_compress(state, in, nblocks - 1, + BLAKE2S_BLOCK_SIZE); in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); } @@ -55,13 +62,16 @@ static inline void __blake2s_update(struct blake2s_state *state, state->buflen += inlen; } -static inline void __blake2s_final(struct blake2s_state *state, u8 *out, - blake2s_compress_t compress) +static __always_inline void +__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic) { blake2s_set_lastblock(state); memset(state->buf + state->buflen, 0, BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ - (*compress)(state, state->buf, 1, state->buflen); + if (force_generic) + blake2s_compress_generic(state, state->buf, 1, state->buflen); + else + blake2s_compress(state, state->buf, 1, state->buflen); cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); memcpy(out, state->h, state->outlen); } @@ -99,20 +109,20 @@ static inline int crypto_blake2s_init(struct shash_desc *desc) static inline int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, unsigned int inlen, - blake2s_compress_t compress) + bool force_generic) { struct blake2s_state *state = shash_desc_ctx(desc); - __blake2s_update(state, in, inlen, compress); + __blake2s_update(state, in, inlen, force_generic); return 0; } static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out, - blake2s_compress_t compress) + bool force_generic) { struct blake2s_state *state = shash_desc_ctx(desc); - __blake2s_final(state, out, compress); + __blake2s_final(state, out, force_generic); return 0; } diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index 9364f79937b81..c71c09621c09c 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -18,14 +18,14 @@ void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) { - __blake2s_update(state, in, inlen, blake2s_compress); + __blake2s_update(state, in, inlen, false); } EXPORT_SYMBOL(blake2s_update); void blake2s_final(struct blake2s_state *state, u8 *out) { WARN_ON(IS_ENABLED(DEBUG) && !out); - __blake2s_final(state, out, blake2s_compress); + __blake2s_final(state, out, false); memzero_explicit(state, sizeof(*state)); } EXPORT_SYMBOL(blake2s_final); -- GitLab From 40af1df8034acd89621dd017c086ff295597d778 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 1 Dec 2021 17:44:49 +0000 Subject: [PATCH 0145/4335] random: document add_hwgenerator_randomness() with other input functions commit 2b6c6e3d9ce3aa0e547ac25d60e06fe035cd9f79 upstream. The section at the top of random.c which documents the input functions available does not document add_hwgenerator_randomness() which might lead a reader to overlook it. Add a brief note about it. Signed-off-by: Mark Brown [Jason: reorganize position of function in doc comment and also document add_bootloader_randomness() while we're at it.] Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index ebe86de9d0acc..9625acdabb429 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -202,6 +202,9 @@ * unsigned int value); * void add_interrupt_randomness(int irq, int irq_flags); * void add_disk_randomness(struct gendisk *disk); + * void add_hwgenerator_randomness(const char *buffer, size_t count, + * size_t entropy); + * void add_bootloader_randomness(const void *buf, unsigned int size); * * add_device_randomness() is for adding data to the random pool that * is likely to differ between two devices (or possibly even per boot). @@ -228,6 +231,14 @@ * particular randomness source. They do this by keeping track of the * first and second order deltas of the event timings. * + * add_hwgenerator_randomness() is for true hardware RNGs, and will credit + * entropy as specified by the caller. If the entropy pool is full it will + * block until more entropy is needed. + * + * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or + * add_device_randomness(), depending on whether or not the configuration + * option CONFIG_RANDOM_TRUST_BOOTLOADER is set. + * * Ensuring unpredictability at system startup * ============================================ * -- GitLab From c5a7694fa80269e02dfe215674616254077ff140 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 7 Dec 2021 13:17:33 +0100 Subject: [PATCH 0146/4335] random: remove unused irq_flags argument from add_interrupt_randomness() commit 703f7066f40599c290babdb79dd61319264987e9 upstream. Since commit ee3e00e9e7101 ("random: use registers from interrupted code for CPU's w/o a cycle counter") the irq_flags argument is no longer used. Remove unused irq_flags. Cc: Borislav Petkov Cc: Dave Hansen Cc: Dexuan Cui Cc: H. Peter Anvin Cc: Haiyang Zhang Cc: Ingo Molnar Cc: K. Y. Srinivasan Cc: Stephen Hemminger Cc: Thomas Gleixner Cc: Wei Liu Cc: linux-hyperv@vger.kernel.org Cc: x86@kernel.org Signed-off-by: Sebastian Andrzej Siewior Acked-by: Wei Liu Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mshyperv.c | 2 +- drivers/char/random.c | 4 ++-- drivers/hv/vmbus_drv.c | 2 +- include/linux/random.h | 2 +- kernel/irq/handle.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index ef6316fef99ff..ba0efc30fac52 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -79,7 +79,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0) inc_irq_stat(hyperv_stimer0_count); if (hv_stimer0_handler) hv_stimer0_handler(); - add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0); + add_interrupt_randomness(HYPERV_STIMER0_VECTOR); ack_APIC_irq(); set_irq_regs(old_regs); diff --git a/drivers/char/random.c b/drivers/char/random.c index 9625acdabb429..4d6c5a5b9a47a 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -200,7 +200,7 @@ * void add_device_randomness(const void *buf, unsigned int size); * void add_input_randomness(unsigned int type, unsigned int code, * unsigned int value); - * void add_interrupt_randomness(int irq, int irq_flags); + * void add_interrupt_randomness(int irq); * void add_disk_randomness(struct gendisk *disk); * void add_hwgenerator_randomness(const char *buffer, size_t count, * size_t entropy); @@ -1273,7 +1273,7 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) return *ptr; } -void add_interrupt_randomness(int irq, int irq_flags) +void add_interrupt_randomness(int irq) { struct entropy_store *r; struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index aea8125a4db8e..50d9113f54025 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1381,7 +1381,7 @@ static void vmbus_isr(void) tasklet_schedule(&hv_cpu->msg_dpc); } - add_interrupt_randomness(vmbus_interrupt, 0); + add_interrupt_randomness(vmbus_interrupt); } static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id) diff --git a/include/linux/random.h b/include/linux/random.h index f45b8be3e3c4e..c45b2693e51fb 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -35,7 +35,7 @@ static inline void add_latent_entropy(void) {} extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; -extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; +extern void add_interrupt_randomness(int irq) __latent_entropy; extern void get_random_bytes(void *buf, int nbytes); extern int wait_for_random_bytes(void); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 221d80c31e94c..fca637d4da1a7 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -195,7 +195,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) retval = __handle_irq_event_percpu(desc, &flags); - add_interrupt_randomness(desc->irq_data.irq, flags); + add_interrupt_randomness(desc->irq_data.irq); if (!irq_settings_no_debug(desc)) note_interrupt(desc, retval); -- GitLab From 2e827d53343ad7e54c0f1371c9d9b8ce62b3ffd7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 21 Dec 2021 16:31:27 +0100 Subject: [PATCH 0147/4335] random: use BLAKE2s instead of SHA1 in extraction commit 9f9eff85a008b095eafc5f4ecbaf5aca689271c1 upstream. This commit addresses one of the lower hanging fruits of the RNG: its usage of SHA1. BLAKE2s is generally faster, and certainly more secure, than SHA1, which has [1] been [2] really [3] very [4] broken [5]. Additionally, the current construction in the RNG doesn't use the full SHA1 function, as specified, and allows overwriting the IV with RDRAND output in an undocumented way, even in the case when RDRAND isn't set to "trusted", which means potential malicious IV choices. And its short length means that keeping only half of it secret when feeding back into the mixer gives us only 2^80 bits of forward secrecy. In other words, not only is the choice of hash function dated, but the use of it isn't really great either. This commit aims to fix both of these issues while also keeping the general structure and semantics as close to the original as possible. Specifically: a) Rather than overwriting the hash IV with RDRAND, we put it into BLAKE2's documented "salt" and "personal" fields, which were specifically created for this type of usage. b) Since this function feeds the full hash result back into the entropy collector, we only return from it half the length of the hash, just as it was done before. This increases the construction's forward secrecy from 2^80 to a much more comfortable 2^128. c) Rather than using the raw "sha1_transform" function alone, we instead use the full proper BLAKE2s function, with finalization. This also has the advantage of supplying 16 bytes at a time rather than SHA1's 10 bytes, which, in addition to having a faster compression function to begin with, means faster extraction in general. On an Intel i7-11850H, this commit makes initial seeding around 131% faster. BLAKE2s itself has the nice property of internally being based on the ChaCha permutation, which the RNG is already using for expansion, so there shouldn't be any issue with newness, funkiness, or surprising CPU behavior, since it's based on something already in use. [1] https://eprint.iacr.org/2005/010.pdf [2] https://www.iacr.org/archive/crypto2005/36210017/36210017.pdf [3] https://eprint.iacr.org/2015/967.pdf [4] https://shattered.io/static/shattered.pdf [5] https://www.usenix.org/system/files/sec20-leurent.pdf Reviewed-by: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jean-Philippe Aumasson Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 71 ++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 41 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 4d6c5a5b9a47a..ca773f100fd60 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1,8 +1,7 @@ /* * random.c -- A strong random number generator * - * Copyright (C) 2017 Jason A. Donenfeld . All - * Rights Reserved. + * Copyright (C) 2017-2022 Jason A. Donenfeld . All Rights Reserved. * * Copyright Matt Mackall , 2003, 2004, 2005 * @@ -78,12 +77,12 @@ * an *estimate* of how many bits of randomness have been stored into * the random number generator's internal state. * - * When random bytes are desired, they are obtained by taking the SHA - * hash of the contents of the "entropy pool". The SHA hash avoids + * When random bytes are desired, they are obtained by taking the BLAKE2s + * hash of the contents of the "entropy pool". The BLAKE2s hash avoids * exposing the internal state of the entropy pool. It is believed to * be computationally infeasible to derive any useful information - * about the input of SHA from its output. Even if it is possible to - * analyze SHA in some clever way, as long as the amount of data + * about the input of BLAKE2s from its output. Even if it is possible to + * analyze BLAKE2s in some clever way, as long as the amount of data * returned from the generator is less than the inherent entropy in * the pool, the output data is totally unpredictable. For this * reason, the routine decreases its internal estimate of how many @@ -93,7 +92,7 @@ * If this estimate goes to zero, the routine can still generate * random numbers; however, an attacker may (at least in theory) be * able to infer the future output of the generator from prior - * outputs. This requires successful cryptanalysis of SHA, which is + * outputs. This requires successful cryptanalysis of BLAKE2s, which is * not believed to be feasible, but there is a remote possibility. * Nonetheless, these numbers should be useful for the vast majority * of purposes. @@ -347,7 +346,7 @@ #include #include #include -#include +#include #include #include @@ -367,10 +366,7 @@ #define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5)) #define OUTPUT_POOL_SHIFT 10 #define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5)) -#define EXTRACT_SIZE 10 - - -#define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long)) +#define EXTRACT_SIZE (BLAKE2S_HASH_SIZE / 2) /* * To allow fractional bits to be tracked, the entropy_count field is @@ -406,7 +402,7 @@ static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS; * Thanks to Colin Plumb for suggesting this. * * The mixing operation is much less sensitive than the output hash, - * where we use SHA-1. All that we want of mixing operation is that + * where we use BLAKE2s. All that we want of mixing operation is that * it be a good non-cryptographic hash; i.e. it not produce collisions * when fed "random" data of the sort we expect to see. As long as * the pool state differs for different inputs, we have preserved the @@ -1399,56 +1395,49 @@ retry: */ static void extract_buf(struct entropy_store *r, __u8 *out) { - int i; - union { - __u32 w[5]; - unsigned long l[LONGS(20)]; - } hash; - __u32 workspace[SHA1_WORKSPACE_WORDS]; + struct blake2s_state state __aligned(__alignof__(unsigned long)); + u8 hash[BLAKE2S_HASH_SIZE]; + unsigned long *salt; unsigned long flags; + blake2s_init(&state, sizeof(hash)); + /* * If we have an architectural hardware random number - * generator, use it for SHA's initial vector + * generator, use it for BLAKE2's salt & personal fields. */ - sha1_init(hash.w); - for (i = 0; i < LONGS(20); i++) { + for (salt = (unsigned long *)&state.h[4]; + salt < (unsigned long *)&state.h[8]; ++salt) { unsigned long v; if (!arch_get_random_long(&v)) break; - hash.l[i] = v; + *salt ^= v; } - /* Generate a hash across the pool, 16 words (512 bits) at a time */ + /* Generate a hash across the pool */ spin_lock_irqsave(&r->lock, flags); - for (i = 0; i < r->poolinfo->poolwords; i += 16) - sha1_transform(hash.w, (__u8 *)(r->pool + i), workspace); + blake2s_update(&state, (const u8 *)r->pool, + r->poolinfo->poolwords * sizeof(*r->pool)); + blake2s_final(&state, hash); /* final zeros out state */ /* * We mix the hash back into the pool to prevent backtracking * attacks (where the attacker knows the state of the pool * plus the current outputs, and attempts to find previous - * ouputs), unless the hash function can be inverted. By - * mixing at least a SHA1 worth of hash data back, we make + * outputs), unless the hash function can be inverted. By + * mixing at least a hash worth of hash data back, we make * brute-forcing the feedback as hard as brute-forcing the * hash. */ - __mix_pool_bytes(r, hash.w, sizeof(hash.w)); + __mix_pool_bytes(r, hash, sizeof(hash)); spin_unlock_irqrestore(&r->lock, flags); - memzero_explicit(workspace, sizeof(workspace)); - - /* - * In case the hash function has some recognizable output - * pattern, we fold it in half. Thus, we always feed back - * twice as much data as we output. + /* Note that EXTRACT_SIZE is half of hash size here, because above + * we've dumped the full length back into mixer. By reducing the + * amount that we emit, we retain a level of forward secrecy. */ - hash.w[0] ^= hash.w[3]; - hash.w[1] ^= hash.w[4]; - hash.w[2] ^= rol32(hash.w[2], 16); - - memcpy(out, &hash, EXTRACT_SIZE); - memzero_explicit(&hash, sizeof(hash)); + memcpy(out, hash, EXTRACT_SIZE); + memzero_explicit(hash, sizeof(hash)); } static ssize_t _extract_entropy(struct entropy_store *r, void *buf, -- GitLab From 08040365d0b7f69645d2f268ef8e428d3ab99187 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 24 Dec 2021 19:17:58 +0100 Subject: [PATCH 0148/4335] random: do not sign extend bytes for rotation when mixing commit 0d9488ffbf2faddebc6bac055bfa6c93b94056a3 upstream. By using `char` instead of `unsigned char`, certain platforms will sign extend the byte when `w = rol32(*bytes++, input_rotate)` is called, meaning that bit 7 is overrepresented when mixing. This isn't a real problem (unless the mixer itself is already broken) since it's still invertible, but it's not quite correct either. Fix this by using an explicit unsigned type. Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index ca773f100fd60..8eac76539fddc 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -547,7 +547,7 @@ static void _mix_pool_bytes(struct entropy_store *r, const void *in, unsigned long i, tap1, tap2, tap3, tap4, tap5; int input_rotate; int wordmask = r->poolinfo->poolwords - 1; - const char *bytes = in; + const unsigned char *bytes = in; __u32 w; tap1 = r->poolinfo->tap1; -- GitLab From 1b1258b91757fa294f1f6a6e7302fe3b368e4221 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 29 Dec 2021 22:10:04 +0100 Subject: [PATCH 0149/4335] random: do not re-init if crng_reseed completes before primary init commit 9c3ddde3f811aabbb83778a2a615bf141b4909ef upstream. If the bootloader supplies sufficient material and crng_reseed() is called very early on, but not too early that wqs aren't available yet, then we might transition to crng_init==2 before rand_initialize()'s call to crng_initialize_primary() made. Then, when crng_initialize_primary() is called, if we're trusting the CPU's RDRAND instructions, we'll needlessly reinitialize the RNG and emit a message about it. This is mostly harmless, as numa_crng_init() will allocate and then free what it just allocated, and excessive calls to invalidate_batched_entropy() aren't so harmful. But it is funky and the extra message is confusing, so avoid the re-initialization all together by checking for crng_init < 2 in crng_initialize_primary(), just as we already do in crng_reseed(). Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 8eac76539fddc..de9ca41ea870e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -827,7 +827,7 @@ static void __init crng_initialize_primary(struct crng_state *crng) { chacha_init_consts(crng->state); _extract_entropy(&input_pool, &crng->state[4], sizeof(__u32) * 12, 0); - if (crng_init_try_arch_early(crng) && trust_cpu) { + if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); numa_crng_init(); crng_init = 2; -- GitLab From c4c9081184e9afc59e0a31fb81fd027cbfde4163 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 29 Dec 2021 22:10:06 +0100 Subject: [PATCH 0150/4335] random: mix bootloader randomness into pool commit 57826feeedb63b091f807ba8325d736775d39afd upstream. If we're trusting bootloader randomness, crng_fast_load() is called by add_hwgenerator_randomness(), which sets us to crng_init==1. However, usually it is only called once for an initial 64-byte push, so bootloader entropy will not mix any bytes into the input pool. So it's conceivable that crng_init==1 when crng_initialize_primary() is called later, but then the input pool is empty. When that happens, the crng state key will be overwritten with extracted output from the empty input pool. That's bad. In contrast, if we're not trusting bootloader randomness, we call crng_slow_load() *and* we call mix_pool_bytes(), so that later crng_initialize_primary() isn't drawing on nothing. In order to prevent crng_initialize_primary() from extracting an empty pool, have the trusted bootloader case mirror that of the untrusted bootloader case, mixing the input into the pool. [linux@dominikbrodowski.net: rewrite commit message] Signed-off-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index de9ca41ea870e..60dc2cd87d2b1 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2301,6 +2301,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, if (unlikely(crng_init == 0)) { size_t ret = crng_fast_load(buffer, count); + mix_pool_bytes(poolp, buffer, ret); count -= ret; buffer += ret; if (!count || crng_init == 0) -- GitLab From 69bb5f0917f973dbaad21d02e4763530627c1dc2 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 29 Dec 2021 22:10:07 +0100 Subject: [PATCH 0151/4335] random: harmonize "crng init done" messages commit 161212c7fd1d9069b232785c75492e50941e2ea8 upstream. We print out "crng init done" for !TRUST_CPU, so we should also print out the same for TRUST_CPU. Signed-off-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 60dc2cd87d2b1..26939a3000599 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -831,7 +831,7 @@ static void __init crng_initialize_primary(struct crng_state *crng) invalidate_batched_entropy(); numa_crng_init(); crng_init = 2; - pr_notice("crng done (trusting CPU's manufacturer)\n"); + pr_notice("crng init done (trusting CPU's manufacturer)\n"); } crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } -- GitLab From 5822fad8d3dc88fb8fc3774ae911afc962290882 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 30 Dec 2021 15:59:26 +0100 Subject: [PATCH 0152/4335] random: use IS_ENABLED(CONFIG_NUMA) instead of ifdefs commit 7b87324112df2e1f9b395217361626362dcfb9fb upstream. Rather than an awkward combination of ifdefs and __maybe_unused, we can ensure more source gets parsed, regardless of the configuration, by using IS_ENABLED for the CONFIG_NUMA conditional code. This makes things cleaner and easier to follow. I've confirmed that on !CONFIG_NUMA, we don't wind up with excess code by accident; the generated object file is the same. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 26939a3000599..55b73e454a122 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -759,7 +759,6 @@ static int credit_entropy_bits_safe(struct entropy_store *r, int nbits) static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); -#ifdef CONFIG_NUMA /* * Hack to deal with crazy userspace progams when they are all trying * to access /dev/urandom in parallel. The programs are almost @@ -767,7 +766,6 @@ static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); * their brain damage. */ static struct crng_state **crng_node_pool __read_mostly; -#endif static void invalidate_batched_entropy(void); static void numa_crng_init(void); @@ -815,7 +813,7 @@ static bool __init crng_init_try_arch_early(struct crng_state *crng) return arch_init; } -static void __maybe_unused crng_initialize_secondary(struct crng_state *crng) +static void crng_initialize_secondary(struct crng_state *crng) { chacha_init_consts(crng->state); _get_random_bytes(&crng->state[4], sizeof(__u32) * 12); @@ -866,7 +864,6 @@ static void crng_finalize_init(struct crng_state *crng) } } -#ifdef CONFIG_NUMA static void do_numa_crng_init(struct work_struct *work) { int i; @@ -893,29 +890,24 @@ static DECLARE_WORK(numa_crng_init_work, do_numa_crng_init); static void numa_crng_init(void) { - schedule_work(&numa_crng_init_work); + if (IS_ENABLED(CONFIG_NUMA)) + schedule_work(&numa_crng_init_work); } static struct crng_state *select_crng(void) { - struct crng_state **pool; - int nid = numa_node_id(); - - /* pairs with cmpxchg_release() in do_numa_crng_init() */ - pool = READ_ONCE(crng_node_pool); - if (pool && pool[nid]) - return pool[nid]; + if (IS_ENABLED(CONFIG_NUMA)) { + struct crng_state **pool; + int nid = numa_node_id(); - return &primary_crng; -} -#else -static void numa_crng_init(void) {} + /* pairs with cmpxchg_release() in do_numa_crng_init() */ + pool = READ_ONCE(crng_node_pool); + if (pool && pool[nid]) + return pool[nid]; + } -static struct crng_state *select_crng(void) -{ return &primary_crng; } -#endif /* * crng_fast_load() can be called by code in the interrupt service -- GitLab From 0c3910447110345a0ae27a6ea6685fdb0bc03028 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 31 Dec 2021 09:26:08 +0100 Subject: [PATCH 0153/4335] random: early initialization of ChaCha constants commit 96562f286884e2db89c74215b199a1084b5fb7f7 upstream. Previously, the ChaCha constants for the primary pool were only initialized in crng_initialize_primary(), called by rand_initialize(). However, some randomness is actually extracted from the primary pool beforehand, e.g. by kmem_cache_create(). Therefore, statically initialize the ChaCha constants for the primary pool. Cc: Herbert Xu Cc: "David S. Miller" Cc: Signed-off-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 5 ++++- include/crypto/chacha.h | 15 +++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 55b73e454a122..2ecfcf924890e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -457,6 +457,10 @@ struct crng_state { static struct crng_state primary_crng = { .lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock), + .state[0] = CHACHA_CONSTANT_EXPA, + .state[1] = CHACHA_CONSTANT_ND_3, + .state[2] = CHACHA_CONSTANT_2_BY, + .state[3] = CHACHA_CONSTANT_TE_K, }; /* @@ -823,7 +827,6 @@ static void crng_initialize_secondary(struct crng_state *crng) static void __init crng_initialize_primary(struct crng_state *crng) { - chacha_init_consts(crng->state); _extract_entropy(&input_pool, &crng->state[4], sizeof(__u32) * 12, 0); if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index dabaee6987186..b3ea73b819443 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -47,12 +47,19 @@ static inline void hchacha_block(const u32 *state, u32 *out, int nrounds) hchacha_block_generic(state, out, nrounds); } +enum chacha_constants { /* expand 32-byte k */ + CHACHA_CONSTANT_EXPA = 0x61707865U, + CHACHA_CONSTANT_ND_3 = 0x3320646eU, + CHACHA_CONSTANT_2_BY = 0x79622d32U, + CHACHA_CONSTANT_TE_K = 0x6b206574U +}; + static inline void chacha_init_consts(u32 *state) { - state[0] = 0x61707865; /* "expa" */ - state[1] = 0x3320646e; /* "nd 3" */ - state[2] = 0x79622d32; /* "2-by" */ - state[3] = 0x6b206574; /* "te k" */ + state[0] = CHACHA_CONSTANT_EXPA; + state[1] = CHACHA_CONSTANT_ND_3; + state[2] = CHACHA_CONSTANT_2_BY; + state[3] = CHACHA_CONSTANT_TE_K; } void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv); -- GitLab From d47579fb283ada92ca83eea1ed22dda19883b483 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 30 Dec 2021 17:50:52 +0100 Subject: [PATCH 0154/4335] random: avoid superfluous call to RDRAND in CRNG extraction commit 2ee25b6968b1b3c66ffa408de23d023c1bce81cf upstream. RDRAND is not fast. RDRAND is actually quite slow. We've known this for a while, which is why functions like get_random_u{32,64} were converted to use batching of our ChaCha-based CRNG instead. Yet CRNG extraction still includes a call to RDRAND, in the hot path of every call to get_random_bytes(), /dev/urandom, and getrandom(2). This call to RDRAND here seems quite superfluous. CRNG is already extracting things based on a 256-bit key, based on good entropy, which is then reseeded periodically, updated, backtrack-mutated, and so forth. The CRNG extraction construction is something that we're already relying on to be secure and solid. If it's not, that's a serious problem, and it's unlikely that mixing in a measly 32 bits from RDRAND is going to alleviate things. And in the case where the CRNG doesn't have enough entropy yet, we're already initializing the ChaCha key row with RDRAND in crng_init_try_arch_early(). Removing the call to RDRAND improves performance on an i7-11850H by 370%. In other words, the vast majority of the work done by extract_crng() prior to this commit was devoted to fetching 32 bits of RDRAND. Reviewed-by: Theodore Ts'o Acked-by: Ard Biesheuvel Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 2ecfcf924890e..8f5e040b9e3ba 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1023,7 +1023,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]) { - unsigned long v, flags, init_time; + unsigned long flags, init_time; if (crng_ready()) { init_time = READ_ONCE(crng->init_time); @@ -1033,8 +1033,6 @@ static void _extract_crng(struct crng_state *crng, &input_pool : NULL); } spin_lock_irqsave(&crng->lock, flags); - if (arch_get_random_long(&v)) - crng->state[14] ^= v; chacha20_block(&crng->state[0], out); if (crng->state[12] == 0) crng->state[13]++; -- GitLab From 15a1a3baf14f13f3a436fb90c5f6e0766c7fd6c5 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 3 Jan 2022 16:59:31 +0100 Subject: [PATCH 0155/4335] random: don't reset crng_init_cnt on urandom_read() commit 6c8e11e08a5b74bb8a5cdd5cbc1e5143df0fba72 upstream. At the moment, urandom_read() (used for /dev/urandom) resets crng_init_cnt to zero when it is called at crng_init<2. This is inconsistent: We do it for /dev/urandom reads, but not for the equivalent getrandom(GRND_INSECURE). (And worse, as Jason pointed out, we're only doing this as long as maxwarn>0.) crng_init_cnt is only read in crng_fast_load(); it is relevant at crng_init==0 for determining when to switch to crng_init==1 (and where in the RNG state array to write). As far as I understand: - crng_init==0 means "we have nothing, we might just be returning the same exact numbers on every boot on every machine, we don't even have non-cryptographic randomness; we should shove every bit of entropy we can get into the RNG immediately" - crng_init==1 means "well we have something, it might not be cryptographic, but at least we're not gonna return the same data every time or whatever, it's probably good enough for TCP and ASLR and stuff; we now have time to build up actual cryptographic entropy in the input pool" - crng_init==2 means "this is supposed to be cryptographically secure now, but we'll keep adding more entropy just to be sure". The current code means that if someone is pulling data from /dev/urandom fast enough at crng_init==0, we'll keep resetting crng_init_cnt, and we'll never make forward progress to crng_init==1. It seems to be intended to prevent an attacker from bruteforcing the contents of small individual RNG inputs on the way from crng_init==0 to crng_init==1, but that's misguided; crng_init==1 isn't supposed to provide proper cryptographic security anyway, RNG users who care about getting secure RNG output have to wait until crng_init==2. This code was inconsistent, and it probably made things worse - just get rid of it. Signed-off-by: Jann Horn Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 8f5e040b9e3ba..08072d46ac8bd 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1831,7 +1831,6 @@ urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes, static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - unsigned long flags; static int maxwarn = 10; if (!crng_ready() && maxwarn > 0) { @@ -1839,9 +1838,6 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) if (__ratelimit(&urandom_warning)) pr_notice("%s: uninitialized urandom read (%zd bytes read)\n", current->comm, nbytes); - spin_lock_irqsave(&primary_crng.lock, flags); - crng_init_cnt = 0; - spin_unlock_irqrestore(&primary_crng.lock, flags); } return urandom_read_nowarn(file, buf, nbytes, ppos); -- GitLab From 250bda5d0505927ca8648254752ee298cbaf9623 Mon Sep 17 00:00:00 2001 From: Schspa Shi Date: Fri, 14 Jan 2022 16:12:16 +0800 Subject: [PATCH 0156/4335] random: fix typo in comments commit c0a8a61e7abbf66729687ee63659ee25983fbb1e upstream. s/or/for Signed-off-by: Schspa Shi Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 08072d46ac8bd..c6d798b9f0f06 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -101,7 +101,7 @@ * =============================== * * There are four exported interfaces; two for use within the kernel, - * and two or use from userspace. + * and two for use from userspace. * * Exported interfaces ---- userspace output * ----------------------------------------- -- GitLab From 4330c485f92cde74756ce9b9a91ebb160385aa1e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 9 Jan 2022 17:32:02 +0100 Subject: [PATCH 0157/4335] random: cleanup poolinfo abstraction commit 91ec0fe138f107232cb36bc6112211db37cb5306 upstream. Now that we're only using one polynomial, we can cleanup its representation into constants, instead of passing around pointers dynamically to select different polynomials. This improves the codegen and makes the code a bit more straightforward. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 67 +++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index c6d798b9f0f06..55d70a3981d38 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -430,14 +430,20 @@ static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS; * polynomial which improves the resulting TGFSR polynomial to be * irreducible, which we have made here. */ -static const struct poolinfo { - int poolbitshift, poolwords, poolbytes, poolfracbits; -#define S(x) ilog2(x)+5, (x), (x)*4, (x) << (ENTROPY_SHIFT+5) - int tap1, tap2, tap3, tap4, tap5; -} poolinfo_table[] = { - /* was: x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 */ +enum poolinfo { + POOL_WORDS = 128, + POOL_WORDMASK = POOL_WORDS - 1, + POOL_BYTES = POOL_WORDS * sizeof(u32), + POOL_BITS = POOL_BYTES * 8, + POOL_BITSHIFT = ilog2(POOL_WORDS) + 5, + POOL_FRACBITS = POOL_WORDS << (ENTROPY_SHIFT + 5), + /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */ - { S(128), 104, 76, 51, 25, 1 }, + POOL_TAP1 = 104, + POOL_TAP2 = 76, + POOL_TAP3 = 51, + POOL_TAP4 = 25, + POOL_TAP5 = 1 }; /* @@ -503,7 +509,6 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); struct entropy_store; struct entropy_store { /* read-only data: */ - const struct poolinfo *poolinfo; __u32 *pool; const char *name; @@ -525,7 +530,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r); static __u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; static struct entropy_store input_pool = { - .poolinfo = &poolinfo_table[0], .name = "input", .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), .pool = input_pool_data @@ -548,33 +552,26 @@ static __u32 const twist_table[8] = { static void _mix_pool_bytes(struct entropy_store *r, const void *in, int nbytes) { - unsigned long i, tap1, tap2, tap3, tap4, tap5; + unsigned long i; int input_rotate; - int wordmask = r->poolinfo->poolwords - 1; const unsigned char *bytes = in; __u32 w; - tap1 = r->poolinfo->tap1; - tap2 = r->poolinfo->tap2; - tap3 = r->poolinfo->tap3; - tap4 = r->poolinfo->tap4; - tap5 = r->poolinfo->tap5; - input_rotate = r->input_rotate; i = r->add_ptr; /* mix one byte at a time to simplify size handling and churn faster */ while (nbytes--) { w = rol32(*bytes++, input_rotate); - i = (i - 1) & wordmask; + i = (i - 1) & POOL_WORDMASK; /* XOR in the various taps */ w ^= r->pool[i]; - w ^= r->pool[(i + tap1) & wordmask]; - w ^= r->pool[(i + tap2) & wordmask]; - w ^= r->pool[(i + tap3) & wordmask]; - w ^= r->pool[(i + tap4) & wordmask]; - w ^= r->pool[(i + tap5) & wordmask]; + w ^= r->pool[(i + POOL_TAP1) & POOL_WORDMASK]; + w ^= r->pool[(i + POOL_TAP2) & POOL_WORDMASK]; + w ^= r->pool[(i + POOL_TAP3) & POOL_WORDMASK]; + w ^= r->pool[(i + POOL_TAP4) & POOL_WORDMASK]; + w ^= r->pool[(i + POOL_TAP5) & POOL_WORDMASK]; /* Mix the result back in with a twist */ r->pool[i] = (w >> 3) ^ twist_table[w & 7]; @@ -672,7 +669,6 @@ static void process_random_ready_list(void) static void credit_entropy_bits(struct entropy_store *r, int nbits) { int entropy_count, orig; - const int pool_size = r->poolinfo->poolfracbits; int nfrac = nbits << ENTROPY_SHIFT; if (!nbits) @@ -706,25 +702,25 @@ retry: * turns no matter how large nbits is. */ int pnfrac = nfrac; - const int s = r->poolinfo->poolbitshift + ENTROPY_SHIFT + 2; + const int s = POOL_BITSHIFT + ENTROPY_SHIFT + 2; /* The +2 corresponds to the /4 in the denominator */ do { - unsigned int anfrac = min(pnfrac, pool_size/2); + unsigned int anfrac = min(pnfrac, POOL_FRACBITS/2); unsigned int add = - ((pool_size - entropy_count)*anfrac*3) >> s; + ((POOL_FRACBITS - entropy_count)*anfrac*3) >> s; entropy_count += add; pnfrac -= anfrac; - } while (unlikely(entropy_count < pool_size-2 && pnfrac)); + } while (unlikely(entropy_count < POOL_FRACBITS-2 && pnfrac)); } if (WARN_ON(entropy_count < 0)) { pr_warn("negative entropy/overflow: pool %s count %d\n", r->name, entropy_count); entropy_count = 0; - } else if (entropy_count > pool_size) - entropy_count = pool_size; + } else if (entropy_count > POOL_FRACBITS) + entropy_count = POOL_FRACBITS; if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) goto retry; @@ -741,13 +737,11 @@ retry: static int credit_entropy_bits_safe(struct entropy_store *r, int nbits) { - const int nbits_max = r->poolinfo->poolwords * 32; - if (nbits < 0) return -EINVAL; /* Cap the value to avoid overflows */ - nbits = min(nbits, nbits_max); + nbits = min(nbits, POOL_BITS); credit_entropy_bits(r, nbits); return 0; @@ -1343,7 +1337,7 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, int entropy_count, orig, have_bytes; size_t ibytes, nfrac; - BUG_ON(r->entropy_count > r->poolinfo->poolfracbits); + BUG_ON(r->entropy_count > POOL_FRACBITS); /* Can we pull enough? */ retry: @@ -1409,8 +1403,7 @@ static void extract_buf(struct entropy_store *r, __u8 *out) /* Generate a hash across the pool */ spin_lock_irqsave(&r->lock, flags); - blake2s_update(&state, (const u8 *)r->pool, - r->poolinfo->poolwords * sizeof(*r->pool)); + blake2s_update(&state, (const u8 *)r->pool, POOL_BYTES); blake2s_final(&state, hash); /* final zeros out state */ /* @@ -1766,7 +1759,7 @@ static void __init init_std_data(struct entropy_store *r) unsigned long rv; mix_pool_bytes(r, &now, sizeof(now)); - for (i = r->poolinfo->poolbytes; i > 0; i -= sizeof(rv)) { + for (i = POOL_BYTES; i > 0; i -= sizeof(rv)) { if (!arch_get_random_seed_long(&rv) && !arch_get_random_long(&rv)) rv = random_get_entropy(); -- GitLab From 8aa9ddc4db2947c50568cfd222bdf16b3a076f6d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 9 Jan 2022 17:48:58 +0100 Subject: [PATCH 0158/4335] random: cleanup integer types commit d38bb0853589c939573ea50e9cb64f733e0e273d upstream. Rather than using the userspace type, __uXX, switch to using uXX. And rather than using variously chosen `char *` or `unsigned char *`, use `u8 *` uniformly for things that aren't strings, in the case where we are doing byte-by-byte traversal. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 105 +++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 53 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 55d70a3981d38..6f260780d6639 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -456,7 +456,7 @@ static DEFINE_SPINLOCK(random_ready_list_lock); static LIST_HEAD(random_ready_list); struct crng_state { - __u32 state[16]; + u32 state[16]; unsigned long init_time; spinlock_t lock; }; @@ -483,9 +483,9 @@ static bool crng_need_final_init = false; static int crng_init_cnt = 0; static unsigned long crng_global_init_time = 0; #define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE) -static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]); +static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]); static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA_BLOCK_SIZE], int used); + u8 tmp[CHACHA_BLOCK_SIZE], int used); static void process_random_ready_list(void); static void _get_random_bytes(void *buf, int nbytes); @@ -509,16 +509,16 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); struct entropy_store; struct entropy_store { /* read-only data: */ - __u32 *pool; + u32 *pool; const char *name; /* read-write data: */ spinlock_t lock; - unsigned short add_ptr; - unsigned short input_rotate; + u16 add_ptr; + u16 input_rotate; int entropy_count; unsigned int last_data_init:1; - __u8 last_data[EXTRACT_SIZE]; + u8 last_data[EXTRACT_SIZE]; }; static ssize_t extract_entropy(struct entropy_store *r, void *buf, @@ -527,7 +527,7 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int fips); static void crng_reseed(struct crng_state *crng, struct entropy_store *r); -static __u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; +static u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; static struct entropy_store input_pool = { .name = "input", @@ -535,7 +535,7 @@ static struct entropy_store input_pool = { .pool = input_pool_data }; -static __u32 const twist_table[8] = { +static u32 const twist_table[8] = { 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; @@ -554,8 +554,8 @@ static void _mix_pool_bytes(struct entropy_store *r, const void *in, { unsigned long i; int input_rotate; - const unsigned char *bytes = in; - __u32 w; + const u8 *bytes = in; + u32 w; input_rotate = r->input_rotate; i = r->add_ptr; @@ -608,10 +608,10 @@ static void mix_pool_bytes(struct entropy_store *r, const void *in, } struct fast_pool { - __u32 pool[4]; + u32 pool[4]; unsigned long last; - unsigned short reg_idx; - unsigned char count; + u16 reg_idx; + u8 count; }; /* @@ -621,8 +621,8 @@ struct fast_pool { */ static void fast_mix(struct fast_pool *f) { - __u32 a = f->pool[0], b = f->pool[1]; - __u32 c = f->pool[2], d = f->pool[3]; + u32 a = f->pool[0], b = f->pool[1]; + u32 c = f->pool[2], d = f->pool[3]; a += b; c += d; b = rol32(b, 6); d = rol32(d, 27); @@ -814,14 +814,14 @@ static bool __init crng_init_try_arch_early(struct crng_state *crng) static void crng_initialize_secondary(struct crng_state *crng) { chacha_init_consts(crng->state); - _get_random_bytes(&crng->state[4], sizeof(__u32) * 12); + _get_random_bytes(&crng->state[4], sizeof(u32) * 12); crng_init_try_arch(crng); crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } static void __init crng_initialize_primary(struct crng_state *crng) { - _extract_entropy(&input_pool, &crng->state[4], sizeof(__u32) * 12, 0); + _extract_entropy(&input_pool, &crng->state[4], sizeof(u32) * 12, 0); if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); numa_crng_init(); @@ -911,10 +911,10 @@ static struct crng_state *select_crng(void) * path. So we can't afford to dilly-dally. Returns the number of * bytes processed from cp. */ -static size_t crng_fast_load(const char *cp, size_t len) +static size_t crng_fast_load(const u8 *cp, size_t len) { unsigned long flags; - char *p; + u8 *p; size_t ret = 0; if (!spin_trylock_irqsave(&primary_crng.lock, flags)) @@ -923,7 +923,7 @@ static size_t crng_fast_load(const char *cp, size_t len) spin_unlock_irqrestore(&primary_crng.lock, flags); return 0; } - p = (unsigned char *) &primary_crng.state[4]; + p = (u8 *) &primary_crng.state[4]; while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; cp++; crng_init_cnt++; len--; ret++; @@ -951,14 +951,14 @@ static size_t crng_fast_load(const char *cp, size_t len) * like a fixed DMI table (for example), which might very well be * unique to the machine, but is otherwise unvarying. */ -static int crng_slow_load(const char *cp, size_t len) +static int crng_slow_load(const u8 *cp, size_t len) { unsigned long flags; - static unsigned char lfsr = 1; - unsigned char tmp; - unsigned i, max = CHACHA_KEY_SIZE; - const char * src_buf = cp; - char * dest_buf = (char *) &primary_crng.state[4]; + static u8 lfsr = 1; + u8 tmp; + unsigned int i, max = CHACHA_KEY_SIZE; + const u8 * src_buf = cp; + u8 * dest_buf = (u8 *) &primary_crng.state[4]; if (!spin_trylock_irqsave(&primary_crng.lock, flags)) return 0; @@ -987,8 +987,8 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) unsigned long flags; int i, num; union { - __u8 block[CHACHA_BLOCK_SIZE]; - __u32 key[8]; + u8 block[CHACHA_BLOCK_SIZE]; + u32 key[8]; } buf; if (r) { @@ -1015,7 +1015,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } static void _extract_crng(struct crng_state *crng, - __u8 out[CHACHA_BLOCK_SIZE]) + u8 out[CHACHA_BLOCK_SIZE]) { unsigned long flags, init_time; @@ -1033,7 +1033,7 @@ static void _extract_crng(struct crng_state *crng, spin_unlock_irqrestore(&crng->lock, flags); } -static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE]) +static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]) { _extract_crng(select_crng(), out); } @@ -1043,26 +1043,26 @@ static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE]) * enough) to mutate the CRNG key to provide backtracking protection. */ static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA_BLOCK_SIZE], int used) + u8 tmp[CHACHA_BLOCK_SIZE], int used) { unsigned long flags; - __u32 *s, *d; + u32 *s, *d; int i; - used = round_up(used, sizeof(__u32)); + used = round_up(used, sizeof(u32)); if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) { extract_crng(tmp); used = 0; } spin_lock_irqsave(&crng->lock, flags); - s = (__u32 *) &tmp[used]; + s = (u32 *) &tmp[used]; d = &crng->state[4]; for (i=0; i < 8; i++) *d++ ^= *s++; spin_unlock_irqrestore(&crng->lock, flags); } -static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used) +static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used) { _crng_backtrack_protect(select_crng(), tmp, used); } @@ -1070,7 +1070,7 @@ static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used) static ssize_t extract_crng_user(void __user *buf, size_t nbytes) { ssize_t ret = 0, i = CHACHA_BLOCK_SIZE; - __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); + u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); int large_request = (nbytes > 256); while (nbytes) { @@ -1158,8 +1158,8 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) struct entropy_store *r; struct { long jiffies; - unsigned cycles; - unsigned num; + unsigned int cycles; + unsigned int num; } sample; long delta, delta2, delta3; @@ -1241,15 +1241,15 @@ static void add_interrupt_bench(cycles_t start) #define add_interrupt_bench(x) #endif -static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) +static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) { - __u32 *ptr = (__u32 *) regs; + u32 *ptr = (u32 *) regs; unsigned int idx; if (regs == NULL) return 0; idx = READ_ONCE(f->reg_idx); - if (idx >= sizeof(struct pt_regs) / sizeof(__u32)) + if (idx >= sizeof(struct pt_regs) / sizeof(u32)) idx = 0; ptr += idx++; WRITE_ONCE(f->reg_idx, idx); @@ -1263,8 +1263,8 @@ void add_interrupt_randomness(int irq) struct pt_regs *regs = get_irq_regs(); unsigned long now = jiffies; cycles_t cycles = random_get_entropy(); - __u32 c_high, j_high; - __u64 ip; + u32 c_high, j_high; + u64 ip; if (cycles == 0) cycles = get_reg(fast_pool, regs); @@ -1282,8 +1282,7 @@ void add_interrupt_randomness(int irq) if (unlikely(crng_init == 0)) { if ((fast_pool->count >= 64) && - crng_fast_load((char *) fast_pool->pool, - sizeof(fast_pool->pool)) > 0) { + crng_fast_load((u8 *)fast_pool->pool, sizeof(fast_pool->pool)) > 0) { fast_pool->count = 0; fast_pool->last = now; } @@ -1380,7 +1379,7 @@ retry: * * Note: we assume that .poolwords is a multiple of 16 words. */ -static void extract_buf(struct entropy_store *r, __u8 *out) +static void extract_buf(struct entropy_store *r, u8 *out) { struct blake2s_state state __aligned(__alignof__(unsigned long)); u8 hash[BLAKE2S_HASH_SIZE]; @@ -1430,7 +1429,7 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int fips) { ssize_t ret = 0, i; - __u8 tmp[EXTRACT_SIZE]; + u8 tmp[EXTRACT_SIZE]; unsigned long flags; while (nbytes) { @@ -1468,7 +1467,7 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf, static ssize_t extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int min, int reserved) { - __u8 tmp[EXTRACT_SIZE]; + u8 tmp[EXTRACT_SIZE]; unsigned long flags; /* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */ @@ -1530,7 +1529,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, */ static void _get_random_bytes(void *buf, int nbytes) { - __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); + u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); trace_get_random_bytes(nbytes, _RET_IP_); @@ -1724,7 +1723,7 @@ EXPORT_SYMBOL(del_random_ready_callback); int __must_check get_random_bytes_arch(void *buf, int nbytes) { int left = nbytes; - char *p = buf; + u8 *p = buf; trace_get_random_bytes_arch(left, _RET_IP_); while (left) { @@ -1866,7 +1865,7 @@ static int write_pool(struct entropy_store *r, const char __user *buffer, size_t count) { size_t bytes; - __u32 t, buf[16]; + u32 t, buf[16]; const char __user *p = buffer; while (count > 0) { @@ -1876,7 +1875,7 @@ write_pool(struct entropy_store *r, const char __user *buffer, size_t count) if (copy_from_user(&buf, p, bytes)) return -EFAULT; - for (b = bytes ; b > 0 ; b -= sizeof(__u32), i++) { + for (b = bytes; b > 0; b -= sizeof(u32), i++) { if (!arch_get_random_int(&t)) break; buf[i] ^= t; -- GitLab From 578fbb760064aa30fc4940a9f0a9eed96d6a36b7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 12 Jan 2022 15:22:30 +0100 Subject: [PATCH 0159/4335] random: remove incomplete last_data logic commit a4bfa9b31802c14ff5847123c12b98d5e36b3985 upstream. There were a few things added under the "if (fips_enabled)" banner, which never really got completed, and the FIPS people anyway are choosing a different direction. Rather than keep around this halfbaked code, get rid of it so that we can focus on a single design of the RNG rather than two designs. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 39 ++++----------------------------------- 1 file changed, 4 insertions(+), 35 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 6f260780d6639..e821efa0d161d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -337,7 +337,6 @@ #include #include #include -#include #include #include #include @@ -517,14 +516,12 @@ struct entropy_store { u16 add_ptr; u16 input_rotate; int entropy_count; - unsigned int last_data_init:1; - u8 last_data[EXTRACT_SIZE]; }; static ssize_t extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int min, int rsvd); static ssize_t _extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int fips); + size_t nbytes); static void crng_reseed(struct crng_state *crng, struct entropy_store *r); static u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; @@ -821,7 +818,7 @@ static void crng_initialize_secondary(struct crng_state *crng) static void __init crng_initialize_primary(struct crng_state *crng) { - _extract_entropy(&input_pool, &crng->state[4], sizeof(u32) * 12, 0); + _extract_entropy(&input_pool, &crng->state[4], sizeof(u32) * 12); if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); numa_crng_init(); @@ -1426,22 +1423,13 @@ static void extract_buf(struct entropy_store *r, u8 *out) } static ssize_t _extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int fips) + size_t nbytes) { ssize_t ret = 0, i; u8 tmp[EXTRACT_SIZE]; - unsigned long flags; while (nbytes) { extract_buf(r, tmp); - - if (fips) { - spin_lock_irqsave(&r->lock, flags); - if (!memcmp(tmp, r->last_data, EXTRACT_SIZE)) - panic("Hardware RNG duplicated output!\n"); - memcpy(r->last_data, tmp, EXTRACT_SIZE); - spin_unlock_irqrestore(&r->lock, flags); - } i = min_t(int, nbytes, EXTRACT_SIZE); memcpy(buf, tmp, i); nbytes -= i; @@ -1467,28 +1455,9 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf, static ssize_t extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int min, int reserved) { - u8 tmp[EXTRACT_SIZE]; - unsigned long flags; - - /* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */ - if (fips_enabled) { - spin_lock_irqsave(&r->lock, flags); - if (!r->last_data_init) { - r->last_data_init = 1; - spin_unlock_irqrestore(&r->lock, flags); - trace_extract_entropy(r->name, EXTRACT_SIZE, - ENTROPY_BITS(r), _RET_IP_); - extract_buf(r, tmp); - spin_lock_irqsave(&r->lock, flags); - memcpy(r->last_data, tmp, EXTRACT_SIZE); - } - spin_unlock_irqrestore(&r->lock, flags); - } - trace_extract_entropy(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_); nbytes = account(r, nbytes, min, reserved); - - return _extract_entropy(r, buf, nbytes, fips_enabled); + return _extract_entropy(r, buf, nbytes); } #define warn_unseeded_randomness(previous) \ -- GitLab From b6d849d0e2f893e58529f7d8c78680d4d1316c6a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 12 Jan 2022 15:28:21 +0100 Subject: [PATCH 0160/4335] random: remove unused extract_entropy() reserved argument commit 8b2d953b91e7f60200c24067ab17b77cc7bfd0d4 upstream. This argument is always set to zero, as a result of us not caring about keeping a certain amount reserved in the pool these days. So just remove it and cleanup the function signatures. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index e821efa0d161d..eefd335b69821 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -519,7 +519,7 @@ struct entropy_store { }; static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min, int rsvd); + size_t nbytes, int min); static ssize_t _extract_entropy(struct entropy_store *r, void *buf, size_t nbytes); @@ -989,7 +989,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } buf; if (r) { - num = extract_entropy(r, &buf, 32, 16, 0); + num = extract_entropy(r, &buf, 32, 16); if (num == 0) return; } else { @@ -1327,8 +1327,7 @@ EXPORT_SYMBOL_GPL(add_disk_randomness); * This function decides how many bytes to actually take from the * given pool, and also debits the entropy count accordingly. */ -static size_t account(struct entropy_store *r, size_t nbytes, int min, - int reserved) +static size_t account(struct entropy_store *r, size_t nbytes, int min) { int entropy_count, orig, have_bytes; size_t ibytes, nfrac; @@ -1342,7 +1341,7 @@ retry: /* never pull more than available */ have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); - if ((have_bytes -= reserved) < 0) + if (have_bytes < 0) have_bytes = 0; ibytes = min_t(size_t, ibytes, have_bytes); if (ibytes < min) @@ -1448,15 +1447,13 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf, * returns it in a buffer. * * The min parameter specifies the minimum amount we can pull before - * failing to avoid races that defeat catastrophic reseeding while the - * reserved parameter indicates how much entropy we must leave in the - * pool after each pull to avoid starving other readers. + * failing to avoid races that defeat catastrophic reseeding. */ static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min, int reserved) + size_t nbytes, int min) { trace_extract_entropy(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_); - nbytes = account(r, nbytes, min, reserved); + nbytes = account(r, nbytes, min); return _extract_entropy(r, buf, nbytes); } -- GitLab From f9efa1a988316514729a6446b825406bfcadb7d7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 12 Jan 2022 17:18:08 +0100 Subject: [PATCH 0161/4335] random: rather than entropy_store abstraction, use global commit 90ed1e67e896cc8040a523f8428fc02f9b164394 upstream. Originally, the RNG used several pools, so having things abstracted out over a generic entropy_store object made sense. These days, there's only one input pool, and then an uneven mix of usage via the abstraction and usage via &input_pool. Rather than this uneasy mixture, just get rid of the abstraction entirely and have things always use the global. This simplifies the code and makes reading it a bit easier. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 219 +++++++++++++++------------------- include/trace/events/random.h | 56 ++++----- 2 files changed, 117 insertions(+), 158 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index eefd335b69821..be103e3dfe24c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -375,7 +375,7 @@ * credit_entropy_bits() needs to be 64 bits wide. */ #define ENTROPY_SHIFT 3 -#define ENTROPY_BITS(r) ((r)->entropy_count >> ENTROPY_SHIFT) +#define ENTROPY_BITS() (input_pool.entropy_count >> ENTROPY_SHIFT) /* * If the entropy count falls under this number of bits, then we @@ -505,33 +505,27 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); * **********************************************************************/ -struct entropy_store; -struct entropy_store { +static u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; + +static struct { /* read-only data: */ u32 *pool; - const char *name; /* read-write data: */ spinlock_t lock; u16 add_ptr; u16 input_rotate; int entropy_count; -}; - -static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min); -static ssize_t _extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes); - -static void crng_reseed(struct crng_state *crng, struct entropy_store *r); -static u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; - -static struct entropy_store input_pool = { - .name = "input", +} input_pool = { .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), .pool = input_pool_data }; +static ssize_t extract_entropy(void *buf, size_t nbytes, int min); +static ssize_t _extract_entropy(void *buf, size_t nbytes); + +static void crng_reseed(struct crng_state *crng, bool use_input_pool); + static u32 const twist_table[8] = { 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; @@ -546,16 +540,15 @@ static u32 const twist_table[8] = { * it's cheap to do so and helps slightly in the expected case where * the entropy is concentrated in the low-order bits. */ -static void _mix_pool_bytes(struct entropy_store *r, const void *in, - int nbytes) +static void _mix_pool_bytes(const void *in, int nbytes) { unsigned long i; int input_rotate; const u8 *bytes = in; u32 w; - input_rotate = r->input_rotate; - i = r->add_ptr; + input_rotate = input_pool.input_rotate; + i = input_pool.add_ptr; /* mix one byte at a time to simplify size handling and churn faster */ while (nbytes--) { @@ -563,15 +556,15 @@ static void _mix_pool_bytes(struct entropy_store *r, const void *in, i = (i - 1) & POOL_WORDMASK; /* XOR in the various taps */ - w ^= r->pool[i]; - w ^= r->pool[(i + POOL_TAP1) & POOL_WORDMASK]; - w ^= r->pool[(i + POOL_TAP2) & POOL_WORDMASK]; - w ^= r->pool[(i + POOL_TAP3) & POOL_WORDMASK]; - w ^= r->pool[(i + POOL_TAP4) & POOL_WORDMASK]; - w ^= r->pool[(i + POOL_TAP5) & POOL_WORDMASK]; + w ^= input_pool.pool[i]; + w ^= input_pool.pool[(i + POOL_TAP1) & POOL_WORDMASK]; + w ^= input_pool.pool[(i + POOL_TAP2) & POOL_WORDMASK]; + w ^= input_pool.pool[(i + POOL_TAP3) & POOL_WORDMASK]; + w ^= input_pool.pool[(i + POOL_TAP4) & POOL_WORDMASK]; + w ^= input_pool.pool[(i + POOL_TAP5) & POOL_WORDMASK]; /* Mix the result back in with a twist */ - r->pool[i] = (w >> 3) ^ twist_table[w & 7]; + input_pool.pool[i] = (w >> 3) ^ twist_table[w & 7]; /* * Normally, we add 7 bits of rotation to the pool. @@ -582,26 +575,24 @@ static void _mix_pool_bytes(struct entropy_store *r, const void *in, input_rotate = (input_rotate + (i ? 7 : 14)) & 31; } - r->input_rotate = input_rotate; - r->add_ptr = i; + input_pool.input_rotate = input_rotate; + input_pool.add_ptr = i; } -static void __mix_pool_bytes(struct entropy_store *r, const void *in, - int nbytes) +static void __mix_pool_bytes(const void *in, int nbytes) { - trace_mix_pool_bytes_nolock(r->name, nbytes, _RET_IP_); - _mix_pool_bytes(r, in, nbytes); + trace_mix_pool_bytes_nolock(nbytes, _RET_IP_); + _mix_pool_bytes(in, nbytes); } -static void mix_pool_bytes(struct entropy_store *r, const void *in, - int nbytes) +static void mix_pool_bytes(const void *in, int nbytes) { unsigned long flags; - trace_mix_pool_bytes(r->name, nbytes, _RET_IP_); - spin_lock_irqsave(&r->lock, flags); - _mix_pool_bytes(r, in, nbytes); - spin_unlock_irqrestore(&r->lock, flags); + trace_mix_pool_bytes(nbytes, _RET_IP_); + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(in, nbytes); + spin_unlock_irqrestore(&input_pool.lock, flags); } struct fast_pool { @@ -663,16 +654,16 @@ static void process_random_ready_list(void) * Use credit_entropy_bits_safe() if the value comes from userspace * or otherwise should be checked for extreme values. */ -static void credit_entropy_bits(struct entropy_store *r, int nbits) +static void credit_entropy_bits(int nbits) { - int entropy_count, orig; + int entropy_count, entropy_bits, orig; int nfrac = nbits << ENTROPY_SHIFT; if (!nbits) return; retry: - entropy_count = orig = READ_ONCE(r->entropy_count); + entropy_count = orig = READ_ONCE(input_pool.entropy_count); if (nfrac < 0) { /* Debit */ entropy_count += nfrac; @@ -713,26 +704,21 @@ retry: } if (WARN_ON(entropy_count < 0)) { - pr_warn("negative entropy/overflow: pool %s count %d\n", - r->name, entropy_count); + pr_warn("negative entropy/overflow: count %d\n", entropy_count); entropy_count = 0; } else if (entropy_count > POOL_FRACBITS) entropy_count = POOL_FRACBITS; - if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) + if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig) goto retry; - trace_credit_entropy_bits(r->name, nbits, - entropy_count >> ENTROPY_SHIFT, _RET_IP_); + trace_credit_entropy_bits(nbits, entropy_count >> ENTROPY_SHIFT, _RET_IP_); - if (r == &input_pool) { - int entropy_bits = entropy_count >> ENTROPY_SHIFT; - - if (crng_init < 2 && entropy_bits >= 128) - crng_reseed(&primary_crng, r); - } + entropy_bits = entropy_count >> ENTROPY_SHIFT; + if (crng_init < 2 && entropy_bits >= 128) + crng_reseed(&primary_crng, true); } -static int credit_entropy_bits_safe(struct entropy_store *r, int nbits) +static int credit_entropy_bits_safe(int nbits) { if (nbits < 0) return -EINVAL; @@ -740,7 +726,7 @@ static int credit_entropy_bits_safe(struct entropy_store *r, int nbits) /* Cap the value to avoid overflows */ nbits = min(nbits, POOL_BITS); - credit_entropy_bits(r, nbits); + credit_entropy_bits(nbits); return 0; } @@ -818,7 +804,7 @@ static void crng_initialize_secondary(struct crng_state *crng) static void __init crng_initialize_primary(struct crng_state *crng) { - _extract_entropy(&input_pool, &crng->state[4], sizeof(u32) * 12); + _extract_entropy(&crng->state[4], sizeof(u32) * 12); if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); numa_crng_init(); @@ -979,7 +965,7 @@ static int crng_slow_load(const u8 *cp, size_t len) return 1; } -static void crng_reseed(struct crng_state *crng, struct entropy_store *r) +static void crng_reseed(struct crng_state *crng, bool use_input_pool) { unsigned long flags; int i, num; @@ -988,8 +974,8 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) u32 key[8]; } buf; - if (r) { - num = extract_entropy(r, &buf, 32, 16); + if (use_input_pool) { + num = extract_entropy(&buf, 32, 16); if (num == 0) return; } else { @@ -1020,8 +1006,7 @@ static void _extract_crng(struct crng_state *crng, init_time = READ_ONCE(crng->init_time); if (time_after(READ_ONCE(crng_global_init_time), init_time) || time_after(jiffies, init_time + CRNG_RESEED_INTERVAL)) - crng_reseed(crng, crng == &primary_crng ? - &input_pool : NULL); + crng_reseed(crng, crng == &primary_crng); } spin_lock_irqsave(&crng->lock, flags); chacha20_block(&crng->state[0], out); @@ -1132,8 +1117,8 @@ void add_device_randomness(const void *buf, unsigned int size) trace_add_device_randomness(size, _RET_IP_); spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(&input_pool, buf, size); - _mix_pool_bytes(&input_pool, &time, sizeof(time)); + _mix_pool_bytes(buf, size); + _mix_pool_bytes(&time, sizeof(time)); spin_unlock_irqrestore(&input_pool.lock, flags); } EXPORT_SYMBOL(add_device_randomness); @@ -1152,7 +1137,6 @@ static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE; */ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) { - struct entropy_store *r; struct { long jiffies; unsigned int cycles; @@ -1163,8 +1147,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) sample.jiffies = jiffies; sample.cycles = random_get_entropy(); sample.num = num; - r = &input_pool; - mix_pool_bytes(r, &sample, sizeof(sample)); + mix_pool_bytes(&sample, sizeof(sample)); /* * Calculate number of bits of randomness we probably added. @@ -1196,7 +1179,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) * Round down by 1 bit on general principles, * and limit entropy estimate to 12 bits. */ - credit_entropy_bits(r, min_t(int, fls(delta>>1), 11)); + credit_entropy_bits(min_t(int, fls(delta>>1), 11)); } void add_input_randomness(unsigned int type, unsigned int code, @@ -1211,7 +1194,7 @@ void add_input_randomness(unsigned int type, unsigned int code, last_value = value; add_timer_randomness(&input_timer_state, (type << 4) ^ code ^ (code >> 4) ^ value); - trace_add_input_randomness(ENTROPY_BITS(&input_pool)); + trace_add_input_randomness(ENTROPY_BITS()); } EXPORT_SYMBOL_GPL(add_input_randomness); @@ -1255,7 +1238,6 @@ static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) void add_interrupt_randomness(int irq) { - struct entropy_store *r; struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned long now = jiffies; @@ -1290,18 +1272,17 @@ void add_interrupt_randomness(int irq) !time_after(now, fast_pool->last + HZ)) return; - r = &input_pool; - if (!spin_trylock(&r->lock)) + if (!spin_trylock(&input_pool.lock)) return; fast_pool->last = now; - __mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool)); - spin_unlock(&r->lock); + __mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool)); + spin_unlock(&input_pool.lock); fast_pool->count = 0; /* award one bit for the contents of the fast pool */ - credit_entropy_bits(r, 1); + credit_entropy_bits(1); } EXPORT_SYMBOL_GPL(add_interrupt_randomness); @@ -1312,7 +1293,7 @@ void add_disk_randomness(struct gendisk *disk) return; /* first major is 1, so we get >= 0x200 here */ add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); - trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS(&input_pool)); + trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS()); } EXPORT_SYMBOL_GPL(add_disk_randomness); #endif @@ -1327,16 +1308,16 @@ EXPORT_SYMBOL_GPL(add_disk_randomness); * This function decides how many bytes to actually take from the * given pool, and also debits the entropy count accordingly. */ -static size_t account(struct entropy_store *r, size_t nbytes, int min) +static size_t account(size_t nbytes, int min) { int entropy_count, orig, have_bytes; size_t ibytes, nfrac; - BUG_ON(r->entropy_count > POOL_FRACBITS); + BUG_ON(input_pool.entropy_count > POOL_FRACBITS); /* Can we pull enough? */ retry: - entropy_count = orig = READ_ONCE(r->entropy_count); + entropy_count = orig = READ_ONCE(input_pool.entropy_count); ibytes = nbytes; /* never pull more than available */ have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); @@ -1348,8 +1329,7 @@ retry: ibytes = 0; if (WARN_ON(entropy_count < 0)) { - pr_warn("negative entropy count: pool %s count %d\n", - r->name, entropy_count); + pr_warn("negative entropy count: count %d\n", entropy_count); entropy_count = 0; } nfrac = ibytes << (ENTROPY_SHIFT + 3); @@ -1358,11 +1338,11 @@ retry: else entropy_count = 0; - if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) + if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig) goto retry; - trace_debit_entropy(r->name, 8 * ibytes); - if (ibytes && ENTROPY_BITS(r) < random_write_wakeup_bits) { + trace_debit_entropy(8 * ibytes); + if (ibytes && ENTROPY_BITS() < random_write_wakeup_bits) { wake_up_interruptible(&random_write_wait); kill_fasync(&fasync, SIGIO, POLL_OUT); } @@ -1375,7 +1355,7 @@ retry: * * Note: we assume that .poolwords is a multiple of 16 words. */ -static void extract_buf(struct entropy_store *r, u8 *out) +static void extract_buf(u8 *out) { struct blake2s_state state __aligned(__alignof__(unsigned long)); u8 hash[BLAKE2S_HASH_SIZE]; @@ -1397,8 +1377,8 @@ static void extract_buf(struct entropy_store *r, u8 *out) } /* Generate a hash across the pool */ - spin_lock_irqsave(&r->lock, flags); - blake2s_update(&state, (const u8 *)r->pool, POOL_BYTES); + spin_lock_irqsave(&input_pool.lock, flags); + blake2s_update(&state, (const u8 *)input_pool.pool, POOL_BYTES); blake2s_final(&state, hash); /* final zeros out state */ /* @@ -1410,8 +1390,8 @@ static void extract_buf(struct entropy_store *r, u8 *out) * brute-forcing the feedback as hard as brute-forcing the * hash. */ - __mix_pool_bytes(r, hash, sizeof(hash)); - spin_unlock_irqrestore(&r->lock, flags); + __mix_pool_bytes(hash, sizeof(hash)); + spin_unlock_irqrestore(&input_pool.lock, flags); /* Note that EXTRACT_SIZE is half of hash size here, because above * we've dumped the full length back into mixer. By reducing the @@ -1421,14 +1401,13 @@ static void extract_buf(struct entropy_store *r, u8 *out) memzero_explicit(hash, sizeof(hash)); } -static ssize_t _extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes) +static ssize_t _extract_entropy(void *buf, size_t nbytes) { ssize_t ret = 0, i; u8 tmp[EXTRACT_SIZE]; while (nbytes) { - extract_buf(r, tmp); + extract_buf(tmp); i = min_t(int, nbytes, EXTRACT_SIZE); memcpy(buf, tmp, i); nbytes -= i; @@ -1449,12 +1428,11 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf, * The min parameter specifies the minimum amount we can pull before * failing to avoid races that defeat catastrophic reseeding. */ -static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min) +static ssize_t extract_entropy(void *buf, size_t nbytes, int min) { - trace_extract_entropy(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_); - nbytes = account(r, nbytes, min); - return _extract_entropy(r, buf, nbytes); + trace_extract_entropy(nbytes, ENTROPY_BITS(), _RET_IP_); + nbytes = account(nbytes, min); + return _extract_entropy(buf, nbytes); } #define warn_unseeded_randomness(previous) \ @@ -1539,7 +1517,7 @@ EXPORT_SYMBOL(get_random_bytes); */ static void entropy_timer(struct timer_list *t) { - credit_entropy_bits(&input_pool, 1); + credit_entropy_bits(1); } /* @@ -1563,14 +1541,14 @@ static void try_to_generate_entropy(void) while (!crng_ready()) { if (!timer_pending(&stack.timer)) mod_timer(&stack.timer, jiffies+1); - mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now)); + mix_pool_bytes(&stack.now, sizeof(stack.now)); schedule(); stack.now = random_get_entropy(); } del_timer_sync(&stack.timer); destroy_timer_on_stack(&stack.timer); - mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now)); + mix_pool_bytes(&stack.now, sizeof(stack.now)); } /* @@ -1711,26 +1689,24 @@ EXPORT_SYMBOL(get_random_bytes_arch); /* * init_std_data - initialize pool with system data * - * @r: pool to initialize - * * This function clears the pool's entropy count and mixes some system * data into the pool to prepare it for use. The pool is not cleared * as that can only decrease the entropy in the pool. */ -static void __init init_std_data(struct entropy_store *r) +static void __init init_std_data(void) { int i; ktime_t now = ktime_get_real(); unsigned long rv; - mix_pool_bytes(r, &now, sizeof(now)); + mix_pool_bytes(&now, sizeof(now)); for (i = POOL_BYTES; i > 0; i -= sizeof(rv)) { if (!arch_get_random_seed_long(&rv) && !arch_get_random_long(&rv)) rv = random_get_entropy(); - mix_pool_bytes(r, &rv, sizeof(rv)); + mix_pool_bytes(&rv, sizeof(rv)); } - mix_pool_bytes(r, utsname(), sizeof(*(utsname()))); + mix_pool_bytes(utsname(), sizeof(*(utsname()))); } /* @@ -1745,7 +1721,7 @@ static void __init init_std_data(struct entropy_store *r) */ int __init rand_initialize(void) { - init_std_data(&input_pool); + init_std_data(); if (crng_need_final_init) crng_finalize_init(&primary_crng); crng_initialize_primary(&primary_crng); @@ -1782,7 +1758,7 @@ urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes, nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3)); ret = extract_crng_user(buf, nbytes); - trace_urandom_read(8 * nbytes, 0, ENTROPY_BITS(&input_pool)); + trace_urandom_read(8 * nbytes, 0, ENTROPY_BITS()); return ret; } @@ -1822,13 +1798,13 @@ random_poll(struct file *file, poll_table * wait) mask = 0; if (crng_ready()) mask |= EPOLLIN | EPOLLRDNORM; - if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits) + if (ENTROPY_BITS() < random_write_wakeup_bits) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } static int -write_pool(struct entropy_store *r, const char __user *buffer, size_t count) +write_pool(const char __user *buffer, size_t count) { size_t bytes; u32 t, buf[16]; @@ -1850,7 +1826,7 @@ write_pool(struct entropy_store *r, const char __user *buffer, size_t count) count -= bytes; p += bytes; - mix_pool_bytes(r, buf, bytes); + mix_pool_bytes(buf, bytes); cond_resched(); } @@ -1862,7 +1838,7 @@ static ssize_t random_write(struct file *file, const char __user *buffer, { size_t ret; - ret = write_pool(&input_pool, buffer, count); + ret = write_pool(buffer, count); if (ret) return ret; @@ -1878,7 +1854,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) switch (cmd) { case RNDGETENTCNT: /* inherently racy, no point locking */ - ent_count = ENTROPY_BITS(&input_pool); + ent_count = ENTROPY_BITS(); if (put_user(ent_count, p)) return -EFAULT; return 0; @@ -1887,7 +1863,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (get_user(ent_count, p)) return -EFAULT; - return credit_entropy_bits_safe(&input_pool, ent_count); + return credit_entropy_bits_safe(ent_count); case RNDADDENTROPY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1897,11 +1873,10 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EINVAL; if (get_user(size, p++)) return -EFAULT; - retval = write_pool(&input_pool, (const char __user *)p, - size); + retval = write_pool((const char __user *)p, size); if (retval < 0) return retval; - return credit_entropy_bits_safe(&input_pool, ent_count); + return credit_entropy_bits_safe(ent_count); case RNDZAPENTCNT: case RNDCLEARPOOL: /* @@ -1920,7 +1895,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (crng_init < 2) return -ENODATA; - crng_reseed(&primary_crng, &input_pool); + crng_reseed(&primary_crng, true); WRITE_ONCE(crng_global_init_time, jiffies - 1); return 0; default: @@ -2244,11 +2219,9 @@ randomize_page(unsigned long start, unsigned long range) void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy) { - struct entropy_store *poolp = &input_pool; - if (unlikely(crng_init == 0)) { size_t ret = crng_fast_load(buffer, count); - mix_pool_bytes(poolp, buffer, ret); + mix_pool_bytes(buffer, ret); count -= ret; buffer += ret; if (!count || crng_init == 0) @@ -2261,9 +2234,9 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, */ wait_event_interruptible(random_write_wait, !system_wq || kthread_should_stop() || - ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); - mix_pool_bytes(poolp, buffer, count); - credit_entropy_bits(poolp, entropy); + ENTROPY_BITS() <= random_write_wakeup_bits); + mix_pool_bytes(buffer, count); + credit_entropy_bits(entropy); } EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); diff --git a/include/trace/events/random.h b/include/trace/events/random.h index 3d7b432ca5f31..a2d9aa16a5d7a 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -28,80 +28,71 @@ TRACE_EVENT(add_device_randomness, ); DECLARE_EVENT_CLASS(random__mix_pool_bytes, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), + TP_PROTO(int bytes, unsigned long IP), - TP_ARGS(pool_name, bytes, IP), + TP_ARGS(bytes, IP), TP_STRUCT__entry( - __field( const char *, pool_name ) __field( int, bytes ) __field(unsigned long, IP ) ), TP_fast_assign( - __entry->pool_name = pool_name; __entry->bytes = bytes; __entry->IP = IP; ), - TP_printk("%s pool: bytes %d caller %pS", - __entry->pool_name, __entry->bytes, (void *)__entry->IP) + TP_printk("input pool: bytes %d caller %pS", + __entry->bytes, (void *)__entry->IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), + TP_PROTO(int bytes, unsigned long IP), - TP_ARGS(pool_name, bytes, IP) + TP_ARGS(bytes, IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), + TP_PROTO(int bytes, unsigned long IP), - TP_ARGS(pool_name, bytes, IP) + TP_ARGS(bytes, IP) ); TRACE_EVENT(credit_entropy_bits, - TP_PROTO(const char *pool_name, int bits, int entropy_count, - unsigned long IP), + TP_PROTO(int bits, int entropy_count, unsigned long IP), - TP_ARGS(pool_name, bits, entropy_count, IP), + TP_ARGS(bits, entropy_count, IP), TP_STRUCT__entry( - __field( const char *, pool_name ) __field( int, bits ) __field( int, entropy_count ) __field(unsigned long, IP ) ), TP_fast_assign( - __entry->pool_name = pool_name; __entry->bits = bits; __entry->entropy_count = entropy_count; __entry->IP = IP; ), - TP_printk("%s pool: bits %d entropy_count %d caller %pS", - __entry->pool_name, __entry->bits, - __entry->entropy_count, (void *)__entry->IP) + TP_printk("input pool: bits %d entropy_count %d caller %pS", + __entry->bits, __entry->entropy_count, (void *)__entry->IP) ); TRACE_EVENT(debit_entropy, - TP_PROTO(const char *pool_name, int debit_bits), + TP_PROTO(int debit_bits), - TP_ARGS(pool_name, debit_bits), + TP_ARGS( debit_bits), TP_STRUCT__entry( - __field( const char *, pool_name ) __field( int, debit_bits ) ), TP_fast_assign( - __entry->pool_name = pool_name; __entry->debit_bits = debit_bits; ), - TP_printk("%s: debit_bits %d", __entry->pool_name, - __entry->debit_bits) + TP_printk("input pool: debit_bits %d", __entry->debit_bits) ); TRACE_EVENT(add_input_randomness, @@ -170,36 +161,31 @@ DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, ); DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(const char *pool_name, int nbytes, int entropy_count, - unsigned long IP), + TP_PROTO(int nbytes, int entropy_count, unsigned long IP), - TP_ARGS(pool_name, nbytes, entropy_count, IP), + TP_ARGS(nbytes, entropy_count, IP), TP_STRUCT__entry( - __field( const char *, pool_name ) __field( int, nbytes ) __field( int, entropy_count ) __field(unsigned long, IP ) ), TP_fast_assign( - __entry->pool_name = pool_name; __entry->nbytes = nbytes; __entry->entropy_count = entropy_count; __entry->IP = IP; ), - TP_printk("%s pool: nbytes %d entropy_count %d caller %pS", - __entry->pool_name, __entry->nbytes, __entry->entropy_count, - (void *)__entry->IP) + TP_printk("input pool: nbytes %d entropy_count %d caller %pS", + __entry->nbytes, __entry->entropy_count, (void *)__entry->IP) ); DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(const char *pool_name, int nbytes, int entropy_count, - unsigned long IP), + TP_PROTO(int nbytes, int entropy_count, unsigned long IP), - TP_ARGS(pool_name, nbytes, entropy_count, IP) + TP_ARGS(nbytes, entropy_count, IP) ); TRACE_EVENT(urandom_read, -- GitLab From da80b44cf9cfb131e54926d7bc6230446e9f9100 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 13 Jan 2022 15:51:06 +0100 Subject: [PATCH 0162/4335] random: remove unused OUTPUT_POOL constants commit 0f63702718c91d89c922081ac1e6baeddc2d8b1a upstream. We no longer have an output pool. Rather, we have just a wakeup bits threshold for /dev/random reads, presumably so that processes don't hang. This value, random_write_wakeup_bits, is configurable anyway. So all the no longer usefully named OUTPUT_POOL constants were doing was setting a reasonable default for random_write_wakeup_bits. This commit gets rid of the constants and just puts it all in the default value of random_write_wakeup_bits. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index be103e3dfe24c..6aaacf6df7eb2 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -363,8 +363,6 @@ */ #define INPUT_POOL_SHIFT 12 #define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5)) -#define OUTPUT_POOL_SHIFT 10 -#define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5)) #define EXTRACT_SIZE (BLAKE2S_HASH_SIZE / 2) /* @@ -382,7 +380,7 @@ * should wake up processes which are selecting or polling on write * access to /dev/random. */ -static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS; +static int random_write_wakeup_bits = 28 * (1 << 5); /* * Originally, we used a primitive polynomial of degree .poolwords -- GitLab From 2772ba4d52235ae2b45140c7bbcfdda23c6a3879 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 13 Jan 2022 16:11:21 +0100 Subject: [PATCH 0163/4335] random: de-duplicate INPUT_POOL constants commit 5b87adf30f1464477169a1d653e9baf8c012bbfe upstream. We already had the POOL_* constants, so deduplicate the older INPUT_POOL ones. As well, fold EXTRACT_SIZE into the poolinfo enum, since it's related. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 6aaacf6df7eb2..cd4935a9847a0 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -358,13 +358,6 @@ /* #define ADD_INTERRUPT_BENCH */ -/* - * Configuration information - */ -#define INPUT_POOL_SHIFT 12 -#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5)) -#define EXTRACT_SIZE (BLAKE2S_HASH_SIZE / 2) - /* * To allow fractional bits to be tracked, the entropy_count field is * denominated in units of 1/8th bits. @@ -440,7 +433,9 @@ enum poolinfo { POOL_TAP2 = 76, POOL_TAP3 = 51, POOL_TAP4 = 25, - POOL_TAP5 = 1 + POOL_TAP5 = 1, + + EXTRACT_SIZE = BLAKE2S_HASH_SIZE / 2 }; /* @@ -503,7 +498,7 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); * **********************************************************************/ -static u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; +static u32 input_pool_data[POOL_WORDS] __latent_entropy; static struct { /* read-only data: */ @@ -1964,7 +1959,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, #include static int min_write_thresh; -static int max_write_thresh = INPUT_POOL_WORDS * 32; +static int max_write_thresh = POOL_BITS; static int random_min_urandom_seed = 60; static char sysctl_bootid[16]; @@ -2021,7 +2016,7 @@ static int proc_do_entropy(struct ctl_table *table, int write, return proc_dointvec(&fake_table, write, buffer, lenp, ppos); } -static int sysctl_poolsize = INPUT_POOL_WORDS * 32; +static int sysctl_poolsize = POOL_BITS; extern struct ctl_table random_table[]; struct ctl_table random_table[] = { { -- GitLab From 68512942c0d70d43ed6050f0a66bc1b027cbd0b4 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 14 Jan 2022 16:48:35 +0100 Subject: [PATCH 0164/4335] random: prepend remaining pool constants with POOL_ commit b3d51c1f542113342ddfbf6007e38a684b9dbec9 upstream. The other pool constants are prepended with POOL_, but not these last ones. Rename them. This will then let us move them into the enum in the following commit. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index cd4935a9847a0..844474b8319ef 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -362,11 +362,11 @@ * To allow fractional bits to be tracked, the entropy_count field is * denominated in units of 1/8th bits. * - * 2*(ENTROPY_SHIFT + poolbitshift) must <= 31, or the multiply in + * 2*(POOL_ENTROPY_SHIFT + poolbitshift) must <= 31, or the multiply in * credit_entropy_bits() needs to be 64 bits wide. */ -#define ENTROPY_SHIFT 3 -#define ENTROPY_BITS() (input_pool.entropy_count >> ENTROPY_SHIFT) +#define POOL_ENTROPY_SHIFT 3 +#define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT) /* * If the entropy count falls under this number of bits, then we @@ -426,7 +426,7 @@ enum poolinfo { POOL_BYTES = POOL_WORDS * sizeof(u32), POOL_BITS = POOL_BYTES * 8, POOL_BITSHIFT = ilog2(POOL_WORDS) + 5, - POOL_FRACBITS = POOL_WORDS << (ENTROPY_SHIFT + 5), + POOL_FRACBITS = POOL_WORDS << (POOL_ENTROPY_SHIFT + 5), /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */ POOL_TAP1 = 104, @@ -650,7 +650,7 @@ static void process_random_ready_list(void) static void credit_entropy_bits(int nbits) { int entropy_count, entropy_bits, orig; - int nfrac = nbits << ENTROPY_SHIFT; + int nfrac = nbits << POOL_ENTROPY_SHIFT; if (!nbits) return; @@ -683,7 +683,7 @@ retry: * turns no matter how large nbits is. */ int pnfrac = nfrac; - const int s = POOL_BITSHIFT + ENTROPY_SHIFT + 2; + const int s = POOL_BITSHIFT + POOL_ENTROPY_SHIFT + 2; /* The +2 corresponds to the /4 in the denominator */ do { @@ -704,9 +704,9 @@ retry: if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig) goto retry; - trace_credit_entropy_bits(nbits, entropy_count >> ENTROPY_SHIFT, _RET_IP_); + trace_credit_entropy_bits(nbits, entropy_count >> POOL_ENTROPY_SHIFT, _RET_IP_); - entropy_bits = entropy_count >> ENTROPY_SHIFT; + entropy_bits = entropy_count >> POOL_ENTROPY_SHIFT; if (crng_init < 2 && entropy_bits >= 128) crng_reseed(&primary_crng, true); } @@ -1187,7 +1187,7 @@ void add_input_randomness(unsigned int type, unsigned int code, last_value = value; add_timer_randomness(&input_timer_state, (type << 4) ^ code ^ (code >> 4) ^ value); - trace_add_input_randomness(ENTROPY_BITS()); + trace_add_input_randomness(POOL_ENTROPY_BITS()); } EXPORT_SYMBOL_GPL(add_input_randomness); @@ -1286,7 +1286,7 @@ void add_disk_randomness(struct gendisk *disk) return; /* first major is 1, so we get >= 0x200 here */ add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); - trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS()); + trace_add_disk_randomness(disk_devt(disk), POOL_ENTROPY_BITS()); } EXPORT_SYMBOL_GPL(add_disk_randomness); #endif @@ -1313,7 +1313,7 @@ retry: entropy_count = orig = READ_ONCE(input_pool.entropy_count); ibytes = nbytes; /* never pull more than available */ - have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); + have_bytes = entropy_count >> (POOL_ENTROPY_SHIFT + 3); if (have_bytes < 0) have_bytes = 0; @@ -1325,7 +1325,7 @@ retry: pr_warn("negative entropy count: count %d\n", entropy_count); entropy_count = 0; } - nfrac = ibytes << (ENTROPY_SHIFT + 3); + nfrac = ibytes << (POOL_ENTROPY_SHIFT + 3); if ((size_t) entropy_count > nfrac) entropy_count -= nfrac; else @@ -1335,7 +1335,7 @@ retry: goto retry; trace_debit_entropy(8 * ibytes); - if (ibytes && ENTROPY_BITS() < random_write_wakeup_bits) { + if (ibytes && POOL_ENTROPY_BITS() < random_write_wakeup_bits) { wake_up_interruptible(&random_write_wait); kill_fasync(&fasync, SIGIO, POLL_OUT); } @@ -1423,7 +1423,7 @@ static ssize_t _extract_entropy(void *buf, size_t nbytes) */ static ssize_t extract_entropy(void *buf, size_t nbytes, int min) { - trace_extract_entropy(nbytes, ENTROPY_BITS(), _RET_IP_); + trace_extract_entropy(nbytes, POOL_ENTROPY_BITS(), _RET_IP_); nbytes = account(nbytes, min); return _extract_entropy(buf, nbytes); } @@ -1749,9 +1749,9 @@ urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes, { int ret; - nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3)); + nbytes = min_t(size_t, nbytes, INT_MAX >> (POOL_ENTROPY_SHIFT + 3)); ret = extract_crng_user(buf, nbytes); - trace_urandom_read(8 * nbytes, 0, ENTROPY_BITS()); + trace_urandom_read(8 * nbytes, 0, POOL_ENTROPY_BITS()); return ret; } @@ -1791,7 +1791,7 @@ random_poll(struct file *file, poll_table * wait) mask = 0; if (crng_ready()) mask |= EPOLLIN | EPOLLRDNORM; - if (ENTROPY_BITS() < random_write_wakeup_bits) + if (POOL_ENTROPY_BITS() < random_write_wakeup_bits) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } @@ -1847,7 +1847,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) switch (cmd) { case RNDGETENTCNT: /* inherently racy, no point locking */ - ent_count = ENTROPY_BITS(); + ent_count = POOL_ENTROPY_BITS(); if (put_user(ent_count, p)) return -EFAULT; return 0; @@ -2008,7 +2008,7 @@ static int proc_do_entropy(struct ctl_table *table, int write, struct ctl_table fake_table; int entropy_count; - entropy_count = *(int *)table->data >> ENTROPY_SHIFT; + entropy_count = *(int *)table->data >> POOL_ENTROPY_SHIFT; fake_table.data = &entropy_count; fake_table.maxlen = sizeof(entropy_count); @@ -2227,7 +2227,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, */ wait_event_interruptible(random_write_wait, !system_wq || kthread_should_stop() || - ENTROPY_BITS() <= random_write_wakeup_bits); + POOL_ENTROPY_BITS() <= random_write_wakeup_bits); mix_pool_bytes(buffer, count); credit_entropy_bits(entropy); } -- GitLab From c25c7e29be27d379a010dde9fed49755d4325df3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 13 Jan 2022 18:18:48 +0100 Subject: [PATCH 0165/4335] random: cleanup fractional entropy shift constants commit 18263c4e8e62f7329f38f5eadc568751242ca89c upstream. The entropy estimator is calculated in terms of 1/8 bits, which means there are various constants where things are shifted by 3. Move these into our pool info enum with the other relevant constants. While we're at it, move an English assertion about sizes into a proper BUILD_BUG_ON so that the compiler can ensure this invariant. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 844474b8319ef..402e3ac7f4a95 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -358,16 +358,6 @@ /* #define ADD_INTERRUPT_BENCH */ -/* - * To allow fractional bits to be tracked, the entropy_count field is - * denominated in units of 1/8th bits. - * - * 2*(POOL_ENTROPY_SHIFT + poolbitshift) must <= 31, or the multiply in - * credit_entropy_bits() needs to be 64 bits wide. - */ -#define POOL_ENTROPY_SHIFT 3 -#define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT) - /* * If the entropy count falls under this number of bits, then we * should wake up processes which are selecting or polling on write @@ -425,8 +415,13 @@ enum poolinfo { POOL_WORDMASK = POOL_WORDS - 1, POOL_BYTES = POOL_WORDS * sizeof(u32), POOL_BITS = POOL_BYTES * 8, - POOL_BITSHIFT = ilog2(POOL_WORDS) + 5, - POOL_FRACBITS = POOL_WORDS << (POOL_ENTROPY_SHIFT + 5), + POOL_BITSHIFT = ilog2(POOL_BITS), + + /* To allow fractional bits to be tracked, the entropy_count field is + * denominated in units of 1/8th bits. */ + POOL_ENTROPY_SHIFT = 3, +#define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT) + POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT, /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */ POOL_TAP1 = 104, @@ -652,6 +647,9 @@ static void credit_entropy_bits(int nbits) int entropy_count, entropy_bits, orig; int nfrac = nbits << POOL_ENTROPY_SHIFT; + /* Ensure that the multiplication can avoid being 64 bits wide. */ + BUILD_BUG_ON(2 * (POOL_ENTROPY_SHIFT + POOL_BITSHIFT) > 31); + if (!nbits) return; @@ -687,13 +685,13 @@ retry: /* The +2 corresponds to the /4 in the denominator */ do { - unsigned int anfrac = min(pnfrac, POOL_FRACBITS/2); + unsigned int anfrac = min(pnfrac, POOL_FRACBITS / 2); unsigned int add = - ((POOL_FRACBITS - entropy_count)*anfrac*3) >> s; + ((POOL_FRACBITS - entropy_count) * anfrac * 3) >> s; entropy_count += add; pnfrac -= anfrac; - } while (unlikely(entropy_count < POOL_FRACBITS-2 && pnfrac)); + } while (unlikely(entropy_count < POOL_FRACBITS - 2 && pnfrac)); } if (WARN_ON(entropy_count < 0)) { -- GitLab From db7a0a9171ed6213596448f88e31363e934b1f8c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 15 Jan 2022 14:40:04 +0100 Subject: [PATCH 0166/4335] random: access input_pool_data directly rather than through pointer commit 6c0eace6e1499712583b6ee62d95161e8b3449f5 upstream. This gets rid of another abstraction we no longer need. It would be nice if we could instead make pool an array rather than a pointer, but the latent entropy plugin won't be able to do its magic in that case. So instead we put all accesses to the input pool's actual data through the input_pool_data array directly. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 402e3ac7f4a95..19a3d79836849 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -496,17 +496,12 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); static u32 input_pool_data[POOL_WORDS] __latent_entropy; static struct { - /* read-only data: */ - u32 *pool; - - /* read-write data: */ spinlock_t lock; u16 add_ptr; u16 input_rotate; int entropy_count; } input_pool = { .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), - .pool = input_pool_data }; static ssize_t extract_entropy(void *buf, size_t nbytes, int min); @@ -544,15 +539,15 @@ static void _mix_pool_bytes(const void *in, int nbytes) i = (i - 1) & POOL_WORDMASK; /* XOR in the various taps */ - w ^= input_pool.pool[i]; - w ^= input_pool.pool[(i + POOL_TAP1) & POOL_WORDMASK]; - w ^= input_pool.pool[(i + POOL_TAP2) & POOL_WORDMASK]; - w ^= input_pool.pool[(i + POOL_TAP3) & POOL_WORDMASK]; - w ^= input_pool.pool[(i + POOL_TAP4) & POOL_WORDMASK]; - w ^= input_pool.pool[(i + POOL_TAP5) & POOL_WORDMASK]; + w ^= input_pool_data[i]; + w ^= input_pool_data[(i + POOL_TAP1) & POOL_WORDMASK]; + w ^= input_pool_data[(i + POOL_TAP2) & POOL_WORDMASK]; + w ^= input_pool_data[(i + POOL_TAP3) & POOL_WORDMASK]; + w ^= input_pool_data[(i + POOL_TAP4) & POOL_WORDMASK]; + w ^= input_pool_data[(i + POOL_TAP5) & POOL_WORDMASK]; /* Mix the result back in with a twist */ - input_pool.pool[i] = (w >> 3) ^ twist_table[w & 7]; + input_pool_data[i] = (w >> 3) ^ twist_table[w & 7]; /* * Normally, we add 7 bits of rotation to the pool. @@ -1369,7 +1364,7 @@ static void extract_buf(u8 *out) /* Generate a hash across the pool */ spin_lock_irqsave(&input_pool.lock, flags); - blake2s_update(&state, (const u8 *)input_pool.pool, POOL_BYTES); + blake2s_update(&state, (const u8 *)input_pool_data, POOL_BYTES); blake2s_final(&state, hash); /* final zeros out state */ /* -- GitLab From 2f0e83e8326becfa63ad4845d47c85cc0cbfd223 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 15 Jan 2022 14:57:22 +0100 Subject: [PATCH 0167/4335] random: selectively clang-format where it makes sense commit 248045b8dea5a32ddc0aa44193d6bc70c4b9cd8e upstream. This is an old driver that has seen a lot of different eras of kernel coding style. In an effort to make it easier to code for, unify the coding style around the current norm, by accepting some of -- but certainly not all of -- the suggestions from clang-format. This should remove ambiguity in coding style, especially with regards to spacing, when code is being changed or amended. Consequently it also makes code review easier on the eyes, following one uniform style rather than several. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 209 ++++++++++++++++++++---------------------- 1 file changed, 99 insertions(+), 110 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 19a3d79836849..1ad9d9993e7be 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -124,7 +124,7 @@ * * The primary kernel interface is * - * void get_random_bytes(void *buf, int nbytes); + * void get_random_bytes(void *buf, int nbytes); * * This interface will return the requested number of random bytes, * and place it in the requested buffer. This is equivalent to a @@ -132,10 +132,10 @@ * * For less critical applications, there are the functions: * - * u32 get_random_u32() - * u64 get_random_u64() - * unsigned int get_random_int() - * unsigned long get_random_long() + * u32 get_random_u32() + * u64 get_random_u64() + * unsigned int get_random_int() + * unsigned long get_random_long() * * These are produced by a cryptographic RNG seeded from get_random_bytes, * and so do not deplete the entropy pool as much. These are recommended @@ -197,10 +197,10 @@ * from the devices are: * * void add_device_randomness(const void *buf, unsigned int size); - * void add_input_randomness(unsigned int type, unsigned int code, + * void add_input_randomness(unsigned int type, unsigned int code, * unsigned int value); * void add_interrupt_randomness(int irq); - * void add_disk_randomness(struct gendisk *disk); + * void add_disk_randomness(struct gendisk *disk); * void add_hwgenerator_randomness(const char *buffer, size_t count, * size_t entropy); * void add_bootloader_randomness(const void *buf, unsigned int size); @@ -296,8 +296,8 @@ * /dev/random and /dev/urandom created already, they can be created * by using the commands: * - * mknod /dev/random c 1 8 - * mknod /dev/urandom c 1 9 + * mknod /dev/random c 1 8 + * mknod /dev/urandom c 1 9 * * Acknowledgements: * ================= @@ -443,9 +443,9 @@ static DEFINE_SPINLOCK(random_ready_list_lock); static LIST_HEAD(random_ready_list); struct crng_state { - u32 state[16]; - unsigned long init_time; - spinlock_t lock; + u32 state[16]; + unsigned long init_time; + spinlock_t lock; }; static struct crng_state primary_crng = { @@ -469,7 +469,7 @@ static bool crng_need_final_init = false; #define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; static unsigned long crng_global_init_time = 0; -#define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE) +#define CRNG_INIT_CNT_THRESH (2 * CHACHA_KEY_SIZE) static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]); static void _crng_backtrack_protect(struct crng_state *crng, u8 tmp[CHACHA_BLOCK_SIZE], int used); @@ -509,7 +509,7 @@ static ssize_t _extract_entropy(void *buf, size_t nbytes); static void crng_reseed(struct crng_state *crng, bool use_input_pool); -static u32 const twist_table[8] = { +static const u32 twist_table[8] = { 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; @@ -579,10 +579,10 @@ static void mix_pool_bytes(const void *in, int nbytes) } struct fast_pool { - u32 pool[4]; - unsigned long last; - u16 reg_idx; - u8 count; + u32 pool[4]; + unsigned long last; + u16 reg_idx; + u8 count; }; /* @@ -710,7 +710,7 @@ static int credit_entropy_bits_safe(int nbits) return -EINVAL; /* Cap the value to avoid overflows */ - nbits = min(nbits, POOL_BITS); + nbits = min(nbits, POOL_BITS); credit_entropy_bits(nbits); return 0; @@ -722,7 +722,7 @@ static int credit_entropy_bits_safe(int nbits) * *********************************************************************/ -#define CRNG_RESEED_INTERVAL (300*HZ) +#define CRNG_RESEED_INTERVAL (300 * HZ) static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); @@ -746,9 +746,9 @@ early_param("random.trust_cpu", parse_trust_cpu); static bool crng_init_try_arch(struct crng_state *crng) { - int i; - bool arch_init = true; - unsigned long rv; + int i; + bool arch_init = true; + unsigned long rv; for (i = 4; i < 16; i++) { if (!arch_get_random_seed_long(&rv) && @@ -764,9 +764,9 @@ static bool crng_init_try_arch(struct crng_state *crng) static bool __init crng_init_try_arch_early(struct crng_state *crng) { - int i; - bool arch_init = true; - unsigned long rv; + int i; + bool arch_init = true; + unsigned long rv; for (i = 4; i < 16; i++) { if (!arch_get_random_seed_long_early(&rv) && @@ -836,7 +836,7 @@ static void do_numa_crng_init(struct work_struct *work) struct crng_state *crng; struct crng_state **pool; - pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL|__GFP_NOFAIL); + pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL | __GFP_NOFAIL); for_each_online_node(i) { crng = kmalloc_node(sizeof(struct crng_state), GFP_KERNEL | __GFP_NOFAIL, i); @@ -892,7 +892,7 @@ static size_t crng_fast_load(const u8 *cp, size_t len) spin_unlock_irqrestore(&primary_crng.lock, flags); return 0; } - p = (u8 *) &primary_crng.state[4]; + p = (u8 *)&primary_crng.state[4]; while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; cp++; crng_init_cnt++; len--; ret++; @@ -922,12 +922,12 @@ static size_t crng_fast_load(const u8 *cp, size_t len) */ static int crng_slow_load(const u8 *cp, size_t len) { - unsigned long flags; - static u8 lfsr = 1; - u8 tmp; - unsigned int i, max = CHACHA_KEY_SIZE; - const u8 * src_buf = cp; - u8 * dest_buf = (u8 *) &primary_crng.state[4]; + unsigned long flags; + static u8 lfsr = 1; + u8 tmp; + unsigned int i, max = CHACHA_KEY_SIZE; + const u8 *src_buf = cp; + u8 *dest_buf = (u8 *)&primary_crng.state[4]; if (!spin_trylock_irqsave(&primary_crng.lock, flags)) return 0; @@ -938,7 +938,7 @@ static int crng_slow_load(const u8 *cp, size_t len) if (len > max) max = len; - for (i = 0; i < max ; i++) { + for (i = 0; i < max; i++) { tmp = lfsr; lfsr >>= 1; if (tmp & 1) @@ -953,11 +953,11 @@ static int crng_slow_load(const u8 *cp, size_t len) static void crng_reseed(struct crng_state *crng, bool use_input_pool) { - unsigned long flags; - int i, num; + unsigned long flags; + int i, num; union { - u8 block[CHACHA_BLOCK_SIZE]; - u32 key[8]; + u8 block[CHACHA_BLOCK_SIZE]; + u32 key[8]; } buf; if (use_input_pool) { @@ -971,11 +971,11 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool) } spin_lock_irqsave(&crng->lock, flags); for (i = 0; i < 8; i++) { - unsigned long rv; + unsigned long rv; if (!arch_get_random_seed_long(&rv) && !arch_get_random_long(&rv)) rv = random_get_entropy(); - crng->state[i+4] ^= buf.key[i] ^ rv; + crng->state[i + 4] ^= buf.key[i] ^ rv; } memzero_explicit(&buf, sizeof(buf)); WRITE_ONCE(crng->init_time, jiffies); @@ -983,8 +983,7 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool) crng_finalize_init(crng); } -static void _extract_crng(struct crng_state *crng, - u8 out[CHACHA_BLOCK_SIZE]) +static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]) { unsigned long flags, init_time; @@ -1013,9 +1012,9 @@ static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]) static void _crng_backtrack_protect(struct crng_state *crng, u8 tmp[CHACHA_BLOCK_SIZE], int used) { - unsigned long flags; - u32 *s, *d; - int i; + unsigned long flags; + u32 *s, *d; + int i; used = round_up(used, sizeof(u32)); if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) { @@ -1023,9 +1022,9 @@ static void _crng_backtrack_protect(struct crng_state *crng, used = 0; } spin_lock_irqsave(&crng->lock, flags); - s = (u32 *) &tmp[used]; + s = (u32 *)&tmp[used]; d = &crng->state[4]; - for (i=0; i < 8; i++) + for (i = 0; i < 8; i++) *d++ ^= *s++; spin_unlock_irqrestore(&crng->lock, flags); } @@ -1070,7 +1069,6 @@ static ssize_t extract_crng_user(void __user *buf, size_t nbytes) return ret; } - /********************************************************************* * * Entropy input management @@ -1165,11 +1163,11 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) * Round down by 1 bit on general principles, * and limit entropy estimate to 12 bits. */ - credit_entropy_bits(min_t(int, fls(delta>>1), 11)); + credit_entropy_bits(min_t(int, fls(delta >> 1), 11)); } void add_input_randomness(unsigned int type, unsigned int code, - unsigned int value) + unsigned int value) { static unsigned char last_value; @@ -1189,19 +1187,19 @@ static DEFINE_PER_CPU(struct fast_pool, irq_randomness); #ifdef ADD_INTERRUPT_BENCH static unsigned long avg_cycles, avg_deviation; -#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */ -#define FIXED_1_2 (1 << (AVG_SHIFT-1)) +#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */ +#define FIXED_1_2 (1 << (AVG_SHIFT - 1)) static void add_interrupt_bench(cycles_t start) { - long delta = random_get_entropy() - start; + long delta = random_get_entropy() - start; - /* Use a weighted moving average */ - delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT); - avg_cycles += delta; - /* And average deviation */ - delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT); - avg_deviation += delta; + /* Use a weighted moving average */ + delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT); + avg_cycles += delta; + /* And average deviation */ + delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT); + avg_deviation += delta; } #else #define add_interrupt_bench(x) @@ -1209,7 +1207,7 @@ static void add_interrupt_bench(cycles_t start) static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) { - u32 *ptr = (u32 *) regs; + u32 *ptr = (u32 *)regs; unsigned int idx; if (regs == NULL) @@ -1224,12 +1222,12 @@ static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) void add_interrupt_randomness(int irq) { - struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); - struct pt_regs *regs = get_irq_regs(); - unsigned long now = jiffies; - cycles_t cycles = random_get_entropy(); - u32 c_high, j_high; - u64 ip; + struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); + struct pt_regs *regs = get_irq_regs(); + unsigned long now = jiffies; + cycles_t cycles = random_get_entropy(); + u32 c_high, j_high; + u64 ip; if (cycles == 0) cycles = get_reg(fast_pool, regs); @@ -1239,8 +1237,8 @@ void add_interrupt_randomness(int irq) fast_pool->pool[1] ^= now ^ c_high; ip = regs ? instruction_pointer(regs) : _RET_IP_; fast_pool->pool[2] ^= ip; - fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 : - get_reg(fast_pool, regs); + fast_pool->pool[3] ^= + (sizeof(ip) > 4) ? ip >> 32 : get_reg(fast_pool, regs); fast_mix(fast_pool); add_interrupt_bench(cycles); @@ -1254,8 +1252,7 @@ void add_interrupt_randomness(int irq) return; } - if ((fast_pool->count < 64) && - !time_after(now, fast_pool->last + HZ)) + if ((fast_pool->count < 64) && !time_after(now, fast_pool->last + HZ)) return; if (!spin_trylock(&input_pool.lock)) @@ -1319,7 +1316,7 @@ retry: entropy_count = 0; } nfrac = ibytes << (POOL_ENTROPY_SHIFT + 3); - if ((size_t) entropy_count > nfrac) + if ((size_t)entropy_count > nfrac) entropy_count -= nfrac; else entropy_count = 0; @@ -1422,10 +1419,9 @@ static ssize_t extract_entropy(void *buf, size_t nbytes, int min) } #define warn_unseeded_randomness(previous) \ - _warn_unseeded_randomness(__func__, (void *) _RET_IP_, (previous)) + _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous)) -static void _warn_unseeded_randomness(const char *func_name, void *caller, - void **previous) +static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous) { #ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM const bool print_once = false; @@ -1433,8 +1429,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, static bool print_once __read_mostly; #endif - if (print_once || - crng_ready() || + if (print_once || crng_ready() || (previous && (caller == READ_ONCE(*previous)))) return; WRITE_ONCE(*previous, caller); @@ -1442,9 +1437,8 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, print_once = true; #endif if (__ratelimit(&unseeded_warning)) - printk_deferred(KERN_NOTICE "random: %s called from %pS " - "with crng_init=%d\n", func_name, caller, - crng_init); + printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", + func_name, caller, crng_init); } /* @@ -1487,7 +1481,6 @@ void get_random_bytes(void *buf, int nbytes) } EXPORT_SYMBOL(get_random_bytes); - /* * Each time the timer fires, we expect that we got an unpredictable * jump in the cycle counter. Even if the timer is running on another @@ -1526,7 +1519,7 @@ static void try_to_generate_entropy(void) timer_setup_on_stack(&stack.timer, entropy_timer, 0); while (!crng_ready()) { if (!timer_pending(&stack.timer)) - mod_timer(&stack.timer, jiffies+1); + mod_timer(&stack.timer, jiffies + 1); mix_pool_bytes(&stack.now, sizeof(stack.now)); schedule(); stack.now = random_get_entropy(); @@ -1736,9 +1729,8 @@ void rand_initialize_disk(struct gendisk *disk) } #endif -static ssize_t -urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) +static ssize_t urandom_read_nowarn(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) { int ret; @@ -1748,8 +1740,8 @@ urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes, return ret; } -static ssize_t -urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) +static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) { static int maxwarn = 10; @@ -1763,8 +1755,8 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) return urandom_read_nowarn(file, buf, nbytes, ppos); } -static ssize_t -random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) +static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) { int ret; @@ -1774,8 +1766,7 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) return urandom_read_nowarn(file, buf, nbytes, ppos); } -static __poll_t -random_poll(struct file *file, poll_table * wait) +static __poll_t random_poll(struct file *file, poll_table *wait) { __poll_t mask; @@ -1789,8 +1780,7 @@ random_poll(struct file *file, poll_table * wait) return mask; } -static int -write_pool(const char __user *buffer, size_t count) +static int write_pool(const char __user *buffer, size_t count) { size_t bytes; u32 t, buf[16]; @@ -1895,9 +1885,9 @@ static int random_fasync(int fd, struct file *filp, int on) } const struct file_operations random_fops = { - .read = random_read, + .read = random_read, .write = random_write, - .poll = random_poll, + .poll = random_poll, .unlocked_ioctl = random_ioctl, .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, @@ -1905,7 +1895,7 @@ const struct file_operations random_fops = { }; const struct file_operations urandom_fops = { - .read = urandom_read, + .read = urandom_read, .write = random_write, .unlocked_ioctl = random_ioctl, .compat_ioctl = compat_ptr_ioctl, @@ -1913,19 +1903,19 @@ const struct file_operations urandom_fops = { .llseek = noop_llseek, }; -SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, - unsigned int, flags) +SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, + flags) { int ret; - if (flags & ~(GRND_NONBLOCK|GRND_RANDOM|GRND_INSECURE)) + if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) return -EINVAL; /* * Requesting insecure and blocking randomness at the same time makes * no sense. */ - if ((flags & (GRND_INSECURE|GRND_RANDOM)) == (GRND_INSECURE|GRND_RANDOM)) + if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) return -EINVAL; if (count > INT_MAX) @@ -1965,8 +1955,8 @@ static char sysctl_bootid[16]; * returned as an ASCII string in the standard UUID format; if via the * sysctl system call, as 16 bytes of binary data. */ -static int proc_do_uuid(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) +static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { struct ctl_table fake_table; unsigned char buf[64], tmp_uuid[16], *uuid; @@ -1995,8 +1985,8 @@ static int proc_do_uuid(struct ctl_table *table, int write, /* * Return entropy available scaled to integral bits */ -static int proc_do_entropy(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) +static int proc_do_entropy(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { struct ctl_table fake_table; int entropy_count; @@ -2073,7 +2063,7 @@ struct ctl_table random_table[] = { #endif { } }; -#endif /* CONFIG_SYSCTL */ +#endif /* CONFIG_SYSCTL */ struct batched_entropy { union { @@ -2093,7 +2083,7 @@ struct batched_entropy { * point prior. */ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { - .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), + .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), }; u64 get_random_u64(void) @@ -2118,7 +2108,7 @@ u64 get_random_u64(void) EXPORT_SYMBOL(get_random_u64); static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { - .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock), + .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock), }; u32 get_random_u32(void) { @@ -2150,7 +2140,7 @@ static void invalidate_batched_entropy(void) int cpu; unsigned long flags; - for_each_possible_cpu (cpu) { + for_each_possible_cpu(cpu) { struct batched_entropy *batched_entropy; batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu); @@ -2179,8 +2169,7 @@ static void invalidate_batched_entropy(void) * Return: A page aligned address within [start, start + range). On error, * @start is returned. */ -unsigned long -randomize_page(unsigned long start, unsigned long range) +unsigned long randomize_page(unsigned long start, unsigned long range) { if (!PAGE_ALIGNED(start)) { range -= PAGE_ALIGN(start) - start; -- GitLab From c1dc53d781fd3d5af0d75b053864fc54ef2ad664 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 17 Jan 2022 18:43:02 +0100 Subject: [PATCH 0168/4335] random: simplify arithmetic function flow in account() commit a254a0e4093fce8c832414a83940736067eed515 upstream. Now that have_bytes is never modified, we can simplify this function. First, we move the check for negative entropy_count to be first. That ensures that subsequent reads of this will be non-negative. Then, have_bytes and ibytes can be folded into their one use site in the min_t() function. Suggested-by: Dominik Brodowski Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 1ad9d9993e7be..40d6c51e5ef8c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1293,7 +1293,7 @@ EXPORT_SYMBOL_GPL(add_disk_randomness); */ static size_t account(size_t nbytes, int min) { - int entropy_count, orig, have_bytes; + int entropy_count, orig; size_t ibytes, nfrac; BUG_ON(input_pool.entropy_count > POOL_FRACBITS); @@ -1301,20 +1301,15 @@ static size_t account(size_t nbytes, int min) /* Can we pull enough? */ retry: entropy_count = orig = READ_ONCE(input_pool.entropy_count); - ibytes = nbytes; - /* never pull more than available */ - have_bytes = entropy_count >> (POOL_ENTROPY_SHIFT + 3); - - if (have_bytes < 0) - have_bytes = 0; - ibytes = min_t(size_t, ibytes, have_bytes); - if (ibytes < min) - ibytes = 0; - if (WARN_ON(entropy_count < 0)) { pr_warn("negative entropy count: count %d\n", entropy_count); entropy_count = 0; } + + /* never pull more than available */ + ibytes = min_t(size_t, nbytes, entropy_count >> (POOL_ENTROPY_SHIFT + 3)); + if (ibytes < min) + ibytes = 0; nfrac = ibytes << (POOL_ENTROPY_SHIFT + 3); if ((size_t)entropy_count > nfrac) entropy_count -= nfrac; -- GitLab From b53c9666e7146fdd60b004f0e90d62c1821173a2 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 25 Jan 2022 21:14:57 +0100 Subject: [PATCH 0169/4335] random: continually use hwgenerator randomness commit c321e907aa4803d562d6e70ebed9444ad082f953 upstream. The rngd kernel thread may sleep indefinitely if the entropy count is kept above random_write_wakeup_bits by other entropy sources. To make best use of multiple sources of randomness, mix entropy from hardware RNGs into the pool at least once within CRNG_RESEED_INTERVAL. Cc: Herbert Xu Cc: Jason A. Donenfeld Signed-off-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 40d6c51e5ef8c..d4111220bbb04 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2198,13 +2198,15 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, return; } - /* Suspend writing if we're above the trickle threshold. + /* Throttle writing if we're above the trickle threshold. * We'll be woken up again once below random_write_wakeup_thresh, - * or when the calling thread is about to terminate. + * when the calling thread is about to terminate, or once + * CRNG_RESEED_INTERVAL has lapsed. */ - wait_event_interruptible(random_write_wait, + wait_event_interruptible_timeout(random_write_wait, !system_wq || kthread_should_stop() || - POOL_ENTROPY_BITS() <= random_write_wakeup_bits); + POOL_ENTROPY_BITS() <= random_write_wakeup_bits, + CRNG_RESEED_INTERVAL); mix_pool_bytes(buffer, count); credit_entropy_bits(entropy); } -- GitLab From 07e015230415863e1a10e0f8a8970700c7c62a84 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 30 Jan 2022 22:03:19 +0100 Subject: [PATCH 0170/4335] random: access primary_pool directly rather than through pointer commit ebf7606388732ecf2821ca21087e9446cb4a5b57 upstream. Both crng_initialize_primary() and crng_init_try_arch_early() are only called for the primary_pool. Accessing it directly instead of through a function parameter simplifies the code. Signed-off-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index d4111220bbb04..0685e64b5cb3a 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -762,7 +762,7 @@ static bool crng_init_try_arch(struct crng_state *crng) return arch_init; } -static bool __init crng_init_try_arch_early(struct crng_state *crng) +static bool __init crng_init_try_arch_early(void) { int i; bool arch_init = true; @@ -774,7 +774,7 @@ static bool __init crng_init_try_arch_early(struct crng_state *crng) rv = random_get_entropy(); arch_init = false; } - crng->state[i] ^= rv; + primary_crng.state[i] ^= rv; } return arch_init; @@ -788,16 +788,16 @@ static void crng_initialize_secondary(struct crng_state *crng) crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } -static void __init crng_initialize_primary(struct crng_state *crng) +static void __init crng_initialize_primary(void) { - _extract_entropy(&crng->state[4], sizeof(u32) * 12); - if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) { + _extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); + if (crng_init_try_arch_early() && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); numa_crng_init(); crng_init = 2; pr_notice("crng init done (trusting CPU's manufacturer)\n"); } - crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; + primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } static void crng_finalize_init(struct crng_state *crng) @@ -1698,7 +1698,7 @@ int __init rand_initialize(void) init_std_data(); if (crng_need_final_init) crng_finalize_init(&primary_crng); - crng_initialize_primary(&primary_crng); + crng_initialize_primary(); crng_global_init_time = jiffies; if (ratelimit_disable) { urandom_warning.interval = 0; -- GitLab From add92df1cb37b5661b28fee5e0591895be5c317f Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 30 Jan 2022 22:03:20 +0100 Subject: [PATCH 0171/4335] random: only call crng_finalize_init() for primary_crng commit 9d5505f1eebeca778074a0260ed077fd85f8792c upstream. crng_finalize_init() returns instantly if it is called for another pool than primary_crng. The test whether crng_finalize_init() is still required can be moved to the relevant caller in crng_reseed(), and crng_need_final_init can be reset to false if crng_finalize_init() is called with workqueues ready. Then, no previous callsite will call crng_finalize_init() unless it is needed, and we can get rid of the superfluous function parameter. Signed-off-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 0685e64b5cb3a..6c3eee2a9004e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -800,10 +800,8 @@ static void __init crng_initialize_primary(void) primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } -static void crng_finalize_init(struct crng_state *crng) +static void crng_finalize_init(void) { - if (crng != &primary_crng || crng_init >= 2) - return; if (!system_wq) { /* We can't call numa_crng_init until we have workqueues, * so mark this for processing later. */ @@ -814,6 +812,7 @@ static void crng_finalize_init(struct crng_state *crng) invalidate_batched_entropy(); numa_crng_init(); crng_init = 2; + crng_need_final_init = false; process_random_ready_list(); wake_up_interruptible(&crng_init_wait); kill_fasync(&fasync, SIGIO, POLL_IN); @@ -980,7 +979,8 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool) memzero_explicit(&buf, sizeof(buf)); WRITE_ONCE(crng->init_time, jiffies); spin_unlock_irqrestore(&crng->lock, flags); - crng_finalize_init(crng); + if (crng == &primary_crng && crng_init < 2) + crng_finalize_init(); } static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]) @@ -1697,7 +1697,7 @@ int __init rand_initialize(void) { init_std_data(); if (crng_need_final_init) - crng_finalize_init(&primary_crng); + crng_finalize_init(); crng_initialize_primary(); crng_global_init_time = jiffies; if (ratelimit_disable) { -- GitLab From a5e2c8a4574abde1a6dd093e1a8863abe68b792d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 16 Jan 2022 14:23:10 +0100 Subject: [PATCH 0172/4335] random: use computational hash for entropy extraction commit 6e8ec2552c7d13991148e551e3325a624d73fac6 upstream. The current 4096-bit LFSR used for entropy collection had a few desirable attributes for the context in which it was created. For example, the state was huge, which meant that /dev/random would be able to output quite a bit of accumulated entropy before blocking. It was also, in its time, quite fast at accumulating entropy byte-by-byte, which matters given the varying contexts in which mix_pool_bytes() is called. And its diffusion was relatively high, which meant that changes would ripple across several words of state rather quickly. However, it also suffers from a few security vulnerabilities. In particular, inputs learned by an attacker can be undone, but moreover, if the state of the pool leaks, its contents can be controlled and entirely zeroed out. I've demonstrated this attack with this SMT2 script, , which Boolector/CaDiCal solves in a matter of seconds on a single core of my laptop, resulting in little proof of concept C demonstrators such as . For basically all recent formal models of RNGs, these attacks represent a significant cryptographic flaw. But how does this manifest practically? If an attacker has access to the system to such a degree that he can learn the internal state of the RNG, arguably there are other lower hanging vulnerabilities -- side-channel, infoleak, or otherwise -- that might have higher priority. On the other hand, seed files are frequently used on systems that have a hard time generating much entropy on their own, and these seed files, being files, often leak or are duplicated and distributed accidentally, or are even seeded over the Internet intentionally, where their contents might be recorded or tampered with. Seen this way, an otherwise quasi-implausible vulnerability is a bit more practical than initially thought. Another aspect of the current mix_pool_bytes() function is that, while its performance was arguably competitive for the time in which it was created, it's no longer considered so. This patch improves performance significantly: on a high-end CPU, an i7-11850H, it improves performance of mix_pool_bytes() by 225%, and on a low-end CPU, a Cortex-A7, it improves performance by 103%. This commit replaces the LFSR of mix_pool_bytes() with a straight- forward cryptographic hash function, BLAKE2s, which is already in use for pool extraction. Universal hashing with a secret seed was considered too, something along the lines of , but the requirement for a secret seed makes for a chicken & egg problem. Instead we go with a formally proven scheme using a computational hash function, described in sections 5.1, 6.4, and B.1.8 of . BLAKE2s outputs 256 bits, which should give us an appropriate amount of min-entropy accumulation, and a wide enough margin of collision resistance against active attacks. mix_pool_bytes() becomes a simple call to blake2s_update(), for accumulation, while the extraction step becomes a blake2s_final() to generate a seed, with which we can then do a HKDF-like or BLAKE2X-like expansion, the first part of which we fold back as an init key for subsequent blake2s_update()s, and the rest we produce to the caller. This then is provided to our CRNG like usual. In that expansion step, we make opportunistic use of 32 bytes of RDRAND output, just as before. We also always reseed the crng with 32 bytes, unconditionally, or not at all, rather than sometimes with 16 as before, as we don't win anything by limiting beyond the 16 byte threshold. Going for a hash function as an entropy collector is a conservative, proven approach. The result of all this is a much simpler and much less bespoke construction than what's there now, which not only plugs a vulnerability but also improves performance considerably. Cc: Theodore Ts'o Cc: Dominik Brodowski Reviewed-by: Eric Biggers Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jean-Philippe Aumasson Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 304 ++++++++---------------------------------- 1 file changed, 55 insertions(+), 249 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 6c3eee2a9004e..b1d6a9e8bf7ee 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -42,61 +42,6 @@ */ /* - * (now, with legal B.S. out of the way.....) - * - * This routine gathers environmental noise from device drivers, etc., - * and returns good random numbers, suitable for cryptographic use. - * Besides the obvious cryptographic uses, these numbers are also good - * for seeding TCP sequence numbers, and other places where it is - * desirable to have numbers which are not only random, but hard to - * predict by an attacker. - * - * Theory of operation - * =================== - * - * Computers are very predictable devices. Hence it is extremely hard - * to produce truly random numbers on a computer --- as opposed to - * pseudo-random numbers, which can easily generated by using a - * algorithm. Unfortunately, it is very easy for attackers to guess - * the sequence of pseudo-random number generators, and for some - * applications this is not acceptable. So instead, we must try to - * gather "environmental noise" from the computer's environment, which - * must be hard for outside attackers to observe, and use that to - * generate random numbers. In a Unix environment, this is best done - * from inside the kernel. - * - * Sources of randomness from the environment include inter-keyboard - * timings, inter-interrupt timings from some interrupts, and other - * events which are both (a) non-deterministic and (b) hard for an - * outside observer to measure. Randomness from these sources are - * added to an "entropy pool", which is mixed using a CRC-like function. - * This is not cryptographically strong, but it is adequate assuming - * the randomness is not chosen maliciously, and it is fast enough that - * the overhead of doing it on every interrupt is very reasonable. - * As random bytes are mixed into the entropy pool, the routines keep - * an *estimate* of how many bits of randomness have been stored into - * the random number generator's internal state. - * - * When random bytes are desired, they are obtained by taking the BLAKE2s - * hash of the contents of the "entropy pool". The BLAKE2s hash avoids - * exposing the internal state of the entropy pool. It is believed to - * be computationally infeasible to derive any useful information - * about the input of BLAKE2s from its output. Even if it is possible to - * analyze BLAKE2s in some clever way, as long as the amount of data - * returned from the generator is less than the inherent entropy in - * the pool, the output data is totally unpredictable. For this - * reason, the routine decreases its internal estimate of how many - * bits of "true randomness" are contained in the entropy pool as it - * outputs random numbers. - * - * If this estimate goes to zero, the routine can still generate - * random numbers; however, an attacker may (at least in theory) be - * able to infer the future output of the generator from prior - * outputs. This requires successful cryptanalysis of BLAKE2s, which is - * not believed to be feasible, but there is a remote possibility. - * Nonetheless, these numbers should be useful for the vast majority - * of purposes. - * * Exported interfaces ---- output * =============================== * @@ -298,23 +243,6 @@ * * mknod /dev/random c 1 8 * mknod /dev/urandom c 1 9 - * - * Acknowledgements: - * ================= - * - * Ideas for constructing this random number generator were derived - * from Pretty Good Privacy's random number generator, and from private - * discussions with Phil Karn. Colin Plumb provided a faster random - * number generator, which speed up the mixing function of the entropy - * pool, taken from PGPfone. Dale Worley has also contributed many - * useful ideas and suggestions to improve this driver. - * - * Any flaws in the design are solely my responsibility, and should - * not be attributed to the Phil, Colin, or any of authors of PGP. - * - * Further background information on this topic may be obtained from - * RFC 1750, "Randomness Recommendations for Security", by Donald - * Eastlake, Steve Crocker, and Jeff Schiller. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -358,79 +286,15 @@ /* #define ADD_INTERRUPT_BENCH */ -/* - * If the entropy count falls under this number of bits, then we - * should wake up processes which are selecting or polling on write - * access to /dev/random. - */ -static int random_write_wakeup_bits = 28 * (1 << 5); - -/* - * Originally, we used a primitive polynomial of degree .poolwords - * over GF(2). The taps for various sizes are defined below. They - * were chosen to be evenly spaced except for the last tap, which is 1 - * to get the twisting happening as fast as possible. - * - * For the purposes of better mixing, we use the CRC-32 polynomial as - * well to make a (modified) twisted Generalized Feedback Shift - * Register. (See M. Matsumoto & Y. Kurita, 1992. Twisted GFSR - * generators. ACM Transactions on Modeling and Computer Simulation - * 2(3):179-194. Also see M. Matsumoto & Y. Kurita, 1994. Twisted - * GFSR generators II. ACM Transactions on Modeling and Computer - * Simulation 4:254-266) - * - * Thanks to Colin Plumb for suggesting this. - * - * The mixing operation is much less sensitive than the output hash, - * where we use BLAKE2s. All that we want of mixing operation is that - * it be a good non-cryptographic hash; i.e. it not produce collisions - * when fed "random" data of the sort we expect to see. As long as - * the pool state differs for different inputs, we have preserved the - * input entropy and done a good job. The fact that an intelligent - * attacker can construct inputs that will produce controlled - * alterations to the pool's state is not important because we don't - * consider such inputs to contribute any randomness. The only - * property we need with respect to them is that the attacker can't - * increase his/her knowledge of the pool's state. Since all - * additions are reversible (knowing the final state and the input, - * you can reconstruct the initial state), if an attacker has any - * uncertainty about the initial state, he/she can only shuffle that - * uncertainty about, but never cause any collisions (which would - * decrease the uncertainty). - * - * Our mixing functions were analyzed by Lacharme, Roeck, Strubel, and - * Videau in their paper, "The Linux Pseudorandom Number Generator - * Revisited" (see: http://eprint.iacr.org/2012/251.pdf). In their - * paper, they point out that we are not using a true Twisted GFSR, - * since Matsumoto & Kurita used a trinomial feedback polynomial (that - * is, with only three taps, instead of the six that we are using). - * As a result, the resulting polynomial is neither primitive nor - * irreducible, and hence does not have a maximal period over - * GF(2**32). They suggest a slight change to the generator - * polynomial which improves the resulting TGFSR polynomial to be - * irreducible, which we have made here. - */ enum poolinfo { - POOL_WORDS = 128, - POOL_WORDMASK = POOL_WORDS - 1, - POOL_BYTES = POOL_WORDS * sizeof(u32), - POOL_BITS = POOL_BYTES * 8, + POOL_BITS = BLAKE2S_HASH_SIZE * 8, POOL_BITSHIFT = ilog2(POOL_BITS), /* To allow fractional bits to be tracked, the entropy_count field is * denominated in units of 1/8th bits. */ POOL_ENTROPY_SHIFT = 3, #define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT) - POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT, - - /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */ - POOL_TAP1 = 104, - POOL_TAP2 = 76, - POOL_TAP3 = 51, - POOL_TAP4 = 25, - POOL_TAP5 = 1, - - EXTRACT_SIZE = BLAKE2S_HASH_SIZE / 2 + POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT }; /* @@ -438,6 +302,12 @@ enum poolinfo { */ static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); static struct fasync_struct *fasync; +/* + * If the entropy count falls under this number of bits, then we + * should wake up processes which are selecting or polling on write + * access to /dev/random. + */ +static int random_write_wakeup_bits = POOL_BITS * 3 / 4; static DEFINE_SPINLOCK(random_ready_list_lock); static LIST_HEAD(random_ready_list); @@ -493,73 +363,31 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); * **********************************************************************/ -static u32 input_pool_data[POOL_WORDS] __latent_entropy; - static struct { + struct blake2s_state hash; spinlock_t lock; - u16 add_ptr; - u16 input_rotate; int entropy_count; } input_pool = { + .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), + BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, + BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 }, + .hash.outlen = BLAKE2S_HASH_SIZE, .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), }; -static ssize_t extract_entropy(void *buf, size_t nbytes, int min); -static ssize_t _extract_entropy(void *buf, size_t nbytes); +static bool extract_entropy(void *buf, size_t nbytes, int min); +static void _extract_entropy(void *buf, size_t nbytes); static void crng_reseed(struct crng_state *crng, bool use_input_pool); -static const u32 twist_table[8] = { - 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, - 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; - /* * This function adds bytes into the entropy "pool". It does not * update the entropy estimate. The caller should call * credit_entropy_bits if this is appropriate. - * - * The pool is stirred with a primitive polynomial of the appropriate - * degree, and then twisted. We twist by three bits at a time because - * it's cheap to do so and helps slightly in the expected case where - * the entropy is concentrated in the low-order bits. */ static void _mix_pool_bytes(const void *in, int nbytes) { - unsigned long i; - int input_rotate; - const u8 *bytes = in; - u32 w; - - input_rotate = input_pool.input_rotate; - i = input_pool.add_ptr; - - /* mix one byte at a time to simplify size handling and churn faster */ - while (nbytes--) { - w = rol32(*bytes++, input_rotate); - i = (i - 1) & POOL_WORDMASK; - - /* XOR in the various taps */ - w ^= input_pool_data[i]; - w ^= input_pool_data[(i + POOL_TAP1) & POOL_WORDMASK]; - w ^= input_pool_data[(i + POOL_TAP2) & POOL_WORDMASK]; - w ^= input_pool_data[(i + POOL_TAP3) & POOL_WORDMASK]; - w ^= input_pool_data[(i + POOL_TAP4) & POOL_WORDMASK]; - w ^= input_pool_data[(i + POOL_TAP5) & POOL_WORDMASK]; - - /* Mix the result back in with a twist */ - input_pool_data[i] = (w >> 3) ^ twist_table[w & 7]; - - /* - * Normally, we add 7 bits of rotation to the pool. - * At the beginning of the pool, add an extra 7 bits - * rotation, so that successive passes spread the - * input bits across the pool evenly. - */ - input_rotate = (input_rotate + (i ? 7 : 14)) & 31; - } - - input_pool.input_rotate = input_rotate; - input_pool.add_ptr = i; + blake2s_update(&input_pool.hash, in, nbytes); } static void __mix_pool_bytes(const void *in, int nbytes) @@ -953,15 +781,14 @@ static int crng_slow_load(const u8 *cp, size_t len) static void crng_reseed(struct crng_state *crng, bool use_input_pool) { unsigned long flags; - int i, num; + int i; union { u8 block[CHACHA_BLOCK_SIZE]; u32 key[8]; } buf; if (use_input_pool) { - num = extract_entropy(&buf, 32, 16); - if (num == 0) + if (!extract_entropy(&buf, 32, 16)) return; } else { _extract_crng(&primary_crng, buf.block); @@ -1329,74 +1156,48 @@ retry: } /* - * This function does the actual extraction for extract_entropy. - * - * Note: we assume that .poolwords is a multiple of 16 words. + * This is an HKDF-like construction for using the hashed collected entropy + * as a PRF key, that's then expanded block-by-block. */ -static void extract_buf(u8 *out) +static void _extract_entropy(void *buf, size_t nbytes) { - struct blake2s_state state __aligned(__alignof__(unsigned long)); - u8 hash[BLAKE2S_HASH_SIZE]; - unsigned long *salt; unsigned long flags; - - blake2s_init(&state, sizeof(hash)); - - /* - * If we have an architectural hardware random number - * generator, use it for BLAKE2's salt & personal fields. - */ - for (salt = (unsigned long *)&state.h[4]; - salt < (unsigned long *)&state.h[8]; ++salt) { - unsigned long v; - if (!arch_get_random_long(&v)) - break; - *salt ^= v; + u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; + struct { + unsigned long rdrand[32 / sizeof(long)]; + size_t counter; + } block; + size_t i; + + for (i = 0; i < ARRAY_SIZE(block.rdrand); ++i) { + if (!arch_get_random_long(&block.rdrand[i])) + block.rdrand[i] = random_get_entropy(); } - /* Generate a hash across the pool */ spin_lock_irqsave(&input_pool.lock, flags); - blake2s_update(&state, (const u8 *)input_pool_data, POOL_BYTES); - blake2s_final(&state, hash); /* final zeros out state */ - /* - * We mix the hash back into the pool to prevent backtracking - * attacks (where the attacker knows the state of the pool - * plus the current outputs, and attempts to find previous - * outputs), unless the hash function can be inverted. By - * mixing at least a hash worth of hash data back, we make - * brute-forcing the feedback as hard as brute-forcing the - * hash. - */ - __mix_pool_bytes(hash, sizeof(hash)); - spin_unlock_irqrestore(&input_pool.lock, flags); + /* seed = HASHPRF(last_key, entropy_input) */ + blake2s_final(&input_pool.hash, seed); - /* Note that EXTRACT_SIZE is half of hash size here, because above - * we've dumped the full length back into mixer. By reducing the - * amount that we emit, we retain a level of forward secrecy. - */ - memcpy(out, hash, EXTRACT_SIZE); - memzero_explicit(hash, sizeof(hash)); -} + /* next_key = HASHPRF(seed, RDRAND || 0) */ + block.counter = 0; + blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed)); + blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key)); -static ssize_t _extract_entropy(void *buf, size_t nbytes) -{ - ssize_t ret = 0, i; - u8 tmp[EXTRACT_SIZE]; + spin_unlock_irqrestore(&input_pool.lock, flags); + memzero_explicit(next_key, sizeof(next_key)); while (nbytes) { - extract_buf(tmp); - i = min_t(int, nbytes, EXTRACT_SIZE); - memcpy(buf, tmp, i); + i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE); + /* output = HASHPRF(seed, RDRAND || ++counter) */ + ++block.counter; + blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); nbytes -= i; buf += i; - ret += i; } - /* Wipe data just returned from memory */ - memzero_explicit(tmp, sizeof(tmp)); - - return ret; + memzero_explicit(seed, sizeof(seed)); + memzero_explicit(&block, sizeof(block)); } /* @@ -1404,13 +1205,18 @@ static ssize_t _extract_entropy(void *buf, size_t nbytes) * returns it in a buffer. * * The min parameter specifies the minimum amount we can pull before - * failing to avoid races that defeat catastrophic reseeding. + * failing to avoid races that defeat catastrophic reseeding. If we + * have less than min entropy available, we return false and buf is + * not filled. */ -static ssize_t extract_entropy(void *buf, size_t nbytes, int min) +static bool extract_entropy(void *buf, size_t nbytes, int min) { trace_extract_entropy(nbytes, POOL_ENTROPY_BITS(), _RET_IP_); - nbytes = account(nbytes, min); - return _extract_entropy(buf, nbytes); + if (account(nbytes, min)) { + _extract_entropy(buf, nbytes); + return true; + } + return false; } #define warn_unseeded_randomness(previous) \ @@ -1674,7 +1480,7 @@ static void __init init_std_data(void) unsigned long rv; mix_pool_bytes(&now, sizeof(now)); - for (i = POOL_BYTES; i > 0; i -= sizeof(rv)) { + for (i = BLAKE2S_BLOCK_SIZE; i > 0; i -= sizeof(rv)) { if (!arch_get_random_seed_long(&rv) && !arch_get_random_long(&rv)) rv = random_get_entropy(); -- GitLab From a6beb8155b9c28e79cc3af8b1cc3944b62616fd9 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 2 Feb 2022 13:30:03 +0100 Subject: [PATCH 0173/4335] random: simplify entropy debiting commit 9c07f57869e90140080cfc282cc628d123e27704 upstream. Our pool is 256 bits, and we only ever use all of it or don't use it at all, which is decided by whether or not it has at least 128 bits in it. So we can drastically simplify the accounting and cmpxchg loop to do exactly this. While we're at it, we move the minimum bit size into a constant so it can be shared between the two places where it matters. The reason we want any of this is for the case in which an attacker has compromised the current state, and then bruteforces small amounts of entropy added to it. By demanding a particular minimum amount of entropy be present before reseeding, we make that bruteforcing difficult. Note that this rationale no longer includes anything about /dev/random blocking at the right moment, since /dev/random no longer blocks (except for at ~boot), but rather uses the crng. In a former life, /dev/random was different and therefore required a more nuanced account(), but this is no longer. Behaviorally, nothing changes here. This is just a simplification of the code. Cc: Theodore Ts'o Cc: Greg Kroah-Hartman Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 91 ++++++++--------------------------- include/trace/events/random.h | 30 +++--------- 2 files changed, 27 insertions(+), 94 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index b1d6a9e8bf7ee..4e1fd614a2148 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -289,12 +289,14 @@ enum poolinfo { POOL_BITS = BLAKE2S_HASH_SIZE * 8, POOL_BITSHIFT = ilog2(POOL_BITS), + POOL_MIN_BITS = POOL_BITS / 2, /* To allow fractional bits to be tracked, the entropy_count field is * denominated in units of 1/8th bits. */ POOL_ENTROPY_SHIFT = 3, #define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT) - POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT + POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT, + POOL_MIN_FRACBITS = POOL_MIN_BITS << POOL_ENTROPY_SHIFT }; /* @@ -375,8 +377,7 @@ static struct { .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), }; -static bool extract_entropy(void *buf, size_t nbytes, int min); -static void _extract_entropy(void *buf, size_t nbytes); +static void extract_entropy(void *buf, size_t nbytes); static void crng_reseed(struct crng_state *crng, bool use_input_pool); @@ -467,7 +468,7 @@ static void process_random_ready_list(void) */ static void credit_entropy_bits(int nbits) { - int entropy_count, entropy_bits, orig; + int entropy_count, orig; int nfrac = nbits << POOL_ENTROPY_SHIFT; /* Ensure that the multiplication can avoid being 64 bits wide. */ @@ -527,8 +528,7 @@ retry: trace_credit_entropy_bits(nbits, entropy_count >> POOL_ENTROPY_SHIFT, _RET_IP_); - entropy_bits = entropy_count >> POOL_ENTROPY_SHIFT; - if (crng_init < 2 && entropy_bits >= 128) + if (crng_init < 2 && entropy_count >= POOL_MIN_FRACBITS) crng_reseed(&primary_crng, true); } @@ -618,7 +618,7 @@ static void crng_initialize_secondary(struct crng_state *crng) static void __init crng_initialize_primary(void) { - _extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); + extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); if (crng_init_try_arch_early() && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); numa_crng_init(); @@ -788,8 +788,17 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool) } buf; if (use_input_pool) { - if (!extract_entropy(&buf, 32, 16)) - return; + int entropy_count; + do { + entropy_count = READ_ONCE(input_pool.entropy_count); + if (entropy_count < POOL_MIN_FRACBITS) + return; + } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); + extract_entropy(buf.key, sizeof(buf.key)); + if (random_write_wakeup_bits) { + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + } } else { _extract_crng(&primary_crng, buf.block); _crng_backtrack_protect(&primary_crng, buf.block, @@ -1114,52 +1123,11 @@ EXPORT_SYMBOL_GPL(add_disk_randomness); * *********************************************************************/ -/* - * This function decides how many bytes to actually take from the - * given pool, and also debits the entropy count accordingly. - */ -static size_t account(size_t nbytes, int min) -{ - int entropy_count, orig; - size_t ibytes, nfrac; - - BUG_ON(input_pool.entropy_count > POOL_FRACBITS); - - /* Can we pull enough? */ -retry: - entropy_count = orig = READ_ONCE(input_pool.entropy_count); - if (WARN_ON(entropy_count < 0)) { - pr_warn("negative entropy count: count %d\n", entropy_count); - entropy_count = 0; - } - - /* never pull more than available */ - ibytes = min_t(size_t, nbytes, entropy_count >> (POOL_ENTROPY_SHIFT + 3)); - if (ibytes < min) - ibytes = 0; - nfrac = ibytes << (POOL_ENTROPY_SHIFT + 3); - if ((size_t)entropy_count > nfrac) - entropy_count -= nfrac; - else - entropy_count = 0; - - if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig) - goto retry; - - trace_debit_entropy(8 * ibytes); - if (ibytes && POOL_ENTROPY_BITS() < random_write_wakeup_bits) { - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } - - return ibytes; -} - /* * This is an HKDF-like construction for using the hashed collected entropy * as a PRF key, that's then expanded block-by-block. */ -static void _extract_entropy(void *buf, size_t nbytes) +static void extract_entropy(void *buf, size_t nbytes) { unsigned long flags; u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; @@ -1169,6 +1137,8 @@ static void _extract_entropy(void *buf, size_t nbytes) } block; size_t i; + trace_extract_entropy(nbytes, POOL_ENTROPY_BITS()); + for (i = 0; i < ARRAY_SIZE(block.rdrand); ++i) { if (!arch_get_random_long(&block.rdrand[i])) block.rdrand[i] = random_get_entropy(); @@ -1200,25 +1170,6 @@ static void _extract_entropy(void *buf, size_t nbytes) memzero_explicit(&block, sizeof(block)); } -/* - * This function extracts randomness from the "entropy pool", and - * returns it in a buffer. - * - * The min parameter specifies the minimum amount we can pull before - * failing to avoid races that defeat catastrophic reseeding. If we - * have less than min entropy available, we return false and buf is - * not filled. - */ -static bool extract_entropy(void *buf, size_t nbytes, int min) -{ - trace_extract_entropy(nbytes, POOL_ENTROPY_BITS(), _RET_IP_); - if (account(nbytes, min)) { - _extract_entropy(buf, nbytes); - return true; - } - return false; -} - #define warn_unseeded_randomness(previous) \ _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous)) diff --git a/include/trace/events/random.h b/include/trace/events/random.h index a2d9aa16a5d7a..ad149aeaf42c5 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -79,22 +79,6 @@ TRACE_EVENT(credit_entropy_bits, __entry->bits, __entry->entropy_count, (void *)__entry->IP) ); -TRACE_EVENT(debit_entropy, - TP_PROTO(int debit_bits), - - TP_ARGS( debit_bits), - - TP_STRUCT__entry( - __field( int, debit_bits ) - ), - - TP_fast_assign( - __entry->debit_bits = debit_bits; - ), - - TP_printk("input pool: debit_bits %d", __entry->debit_bits) -); - TRACE_EVENT(add_input_randomness, TP_PROTO(int input_bits), @@ -161,31 +145,29 @@ DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, ); DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(int nbytes, int entropy_count, unsigned long IP), + TP_PROTO(int nbytes, int entropy_count), - TP_ARGS(nbytes, entropy_count, IP), + TP_ARGS(nbytes, entropy_count), TP_STRUCT__entry( __field( int, nbytes ) __field( int, entropy_count ) - __field(unsigned long, IP ) ), TP_fast_assign( __entry->nbytes = nbytes; __entry->entropy_count = entropy_count; - __entry->IP = IP; ), - TP_printk("input pool: nbytes %d entropy_count %d caller %pS", - __entry->nbytes, __entry->entropy_count, (void *)__entry->IP) + TP_printk("input pool: nbytes %d entropy_count %d", + __entry->nbytes, __entry->entropy_count) ); DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(int nbytes, int entropy_count, unsigned long IP), + TP_PROTO(int nbytes, int entropy_count), - TP_ARGS(nbytes, entropy_count, IP) + TP_ARGS(nbytes, entropy_count) ); TRACE_EVENT(urandom_read, -- GitLab From cb65ac3008163fb4a3dd8a9d8342f03e43123b27 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 3 Feb 2022 13:28:06 +0100 Subject: [PATCH 0174/4335] random: use linear min-entropy accumulation crediting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c570449094844527577c5c914140222cb1893e3f upstream. 30e37ec516ae ("random: account for entropy loss due to overwrites") assumed that adding new entropy to the LFSR pool probabilistically cancelled out old entropy there, so entropy was credited asymptotically, approximating Shannon entropy of independent sources (rather than a stronger min-entropy notion) using 1/8th fractional bits and replacing a constant 2-2/√𝑒 term (~0.786938) with 3/4 (0.75) to slightly underestimate it. This wasn't superb, but it was perhaps better than nothing, so that's what was done. Which entropy specifically was being cancelled out and how much precisely each time is hard to tell, though as I showed with the attack code in my previous commit, a motivated adversary with sufficient information can actually cancel out everything. Since we're no longer using an LFSR for entropy accumulation, this probabilistic cancellation is no longer relevant. Rather, we're now using a computational hash function as the accumulator and we've switched to working in the random oracle model, from which we can now revisit the question of min-entropy accumulation, which is done in detail in . Consider a long input bit string that is built by concatenating various smaller independent input bit strings. Each one of these inputs has a designated min-entropy, which is what we're passing to credit_entropy_bits(h). When we pass the concatenation of these to a random oracle, it means that an adversary trying to receive back the same reply as us would need to become certain about each part of the concatenated bit string we passed in, which means becoming certain about all of those h values. That means we can estimate the accumulation by simply adding up the h values in calls to credit_entropy_bits(h); there's no probabilistic cancellation at play like there was said to be for the LFSR. Incidentally, this is also what other entropy accumulators based on computational hash functions do as well. So this commit replaces credit_entropy_bits(h) with essentially `total = min(POOL_BITS, total + h)`, done with a cmpxchg loop as before. What if we're wrong and the above is nonsense? It's not, but let's assume we don't want the actual _behavior_ of the code to change much. Currently that behavior is not extracting from the input pool until it has 128 bits of entropy in it. With the old algorithm, we'd hit that magic 128 number after roughly 256 calls to credit_entropy_bits(1). So, we can retain more or less the old behavior by waiting to extract from the input pool until it hits 256 bits of entropy using the new code. For people concerned about this change, it means that there's not that much practical behavioral change. And for folks actually trying to model the behavior rigorously, it means that we have an even higher margin against attacks. Cc: Theodore Ts'o Cc: Dominik Brodowski Cc: Greg Kroah-Hartman Reviewed-by: Eric Biggers Reviewed-by: Jean-Philippe Aumasson Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 114 ++++++++---------------------------------- 1 file changed, 20 insertions(+), 94 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 4e1fd614a2148..f678620a25f8b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -286,17 +286,9 @@ /* #define ADD_INTERRUPT_BENCH */ -enum poolinfo { +enum { POOL_BITS = BLAKE2S_HASH_SIZE * 8, - POOL_BITSHIFT = ilog2(POOL_BITS), - POOL_MIN_BITS = POOL_BITS / 2, - - /* To allow fractional bits to be tracked, the entropy_count field is - * denominated in units of 1/8th bits. */ - POOL_ENTROPY_SHIFT = 3, -#define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT) - POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT, - POOL_MIN_FRACBITS = POOL_MIN_BITS << POOL_ENTROPY_SHIFT + POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ }; /* @@ -309,7 +301,7 @@ static struct fasync_struct *fasync; * should wake up processes which are selecting or polling on write * access to /dev/random. */ -static int random_write_wakeup_bits = POOL_BITS * 3 / 4; +static int random_write_wakeup_bits = POOL_MIN_BITS; static DEFINE_SPINLOCK(random_ready_list_lock); static LIST_HEAD(random_ready_list); @@ -469,66 +461,18 @@ static void process_random_ready_list(void) static void credit_entropy_bits(int nbits) { int entropy_count, orig; - int nfrac = nbits << POOL_ENTROPY_SHIFT; - - /* Ensure that the multiplication can avoid being 64 bits wide. */ - BUILD_BUG_ON(2 * (POOL_ENTROPY_SHIFT + POOL_BITSHIFT) > 31); if (!nbits) return; -retry: - entropy_count = orig = READ_ONCE(input_pool.entropy_count); - if (nfrac < 0) { - /* Debit */ - entropy_count += nfrac; - } else { - /* - * Credit: we have to account for the possibility of - * overwriting already present entropy. Even in the - * ideal case of pure Shannon entropy, new contributions - * approach the full value asymptotically: - * - * entropy <- entropy + (pool_size - entropy) * - * (1 - exp(-add_entropy/pool_size)) - * - * For add_entropy <= pool_size/2 then - * (1 - exp(-add_entropy/pool_size)) >= - * (add_entropy/pool_size)*0.7869... - * so we can approximate the exponential with - * 3/4*add_entropy/pool_size and still be on the - * safe side by adding at most pool_size/2 at a time. - * - * The use of pool_size-2 in the while statement is to - * prevent rounding artifacts from making the loop - * arbitrarily long; this limits the loop to log2(pool_size)*2 - * turns no matter how large nbits is. - */ - int pnfrac = nfrac; - const int s = POOL_BITSHIFT + POOL_ENTROPY_SHIFT + 2; - /* The +2 corresponds to the /4 in the denominator */ - - do { - unsigned int anfrac = min(pnfrac, POOL_FRACBITS / 2); - unsigned int add = - ((POOL_FRACBITS - entropy_count) * anfrac * 3) >> s; - - entropy_count += add; - pnfrac -= anfrac; - } while (unlikely(entropy_count < POOL_FRACBITS - 2 && pnfrac)); - } - - if (WARN_ON(entropy_count < 0)) { - pr_warn("negative entropy/overflow: count %d\n", entropy_count); - entropy_count = 0; - } else if (entropy_count > POOL_FRACBITS) - entropy_count = POOL_FRACBITS; - if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig) - goto retry; + do { + orig = READ_ONCE(input_pool.entropy_count); + entropy_count = min(POOL_BITS, orig + nbits); + } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); - trace_credit_entropy_bits(nbits, entropy_count >> POOL_ENTROPY_SHIFT, _RET_IP_); + trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_); - if (crng_init < 2 && entropy_count >= POOL_MIN_FRACBITS) + if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) crng_reseed(&primary_crng, true); } @@ -791,7 +735,7 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool) int entropy_count; do { entropy_count = READ_ONCE(input_pool.entropy_count); - if (entropy_count < POOL_MIN_FRACBITS) + if (entropy_count < POOL_MIN_BITS) return; } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); extract_entropy(buf.key, sizeof(buf.key)); @@ -1014,7 +958,7 @@ void add_input_randomness(unsigned int type, unsigned int code, last_value = value; add_timer_randomness(&input_timer_state, (type << 4) ^ code ^ (code >> 4) ^ value); - trace_add_input_randomness(POOL_ENTROPY_BITS()); + trace_add_input_randomness(input_pool.entropy_count); } EXPORT_SYMBOL_GPL(add_input_randomness); @@ -1112,7 +1056,7 @@ void add_disk_randomness(struct gendisk *disk) return; /* first major is 1, so we get >= 0x200 here */ add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); - trace_add_disk_randomness(disk_devt(disk), POOL_ENTROPY_BITS()); + trace_add_disk_randomness(disk_devt(disk), input_pool.entropy_count); } EXPORT_SYMBOL_GPL(add_disk_randomness); #endif @@ -1137,7 +1081,7 @@ static void extract_entropy(void *buf, size_t nbytes) } block; size_t i; - trace_extract_entropy(nbytes, POOL_ENTROPY_BITS()); + trace_extract_entropy(nbytes, input_pool.entropy_count); for (i = 0; i < ARRAY_SIZE(block.rdrand); ++i) { if (!arch_get_random_long(&block.rdrand[i])) @@ -1486,9 +1430,9 @@ static ssize_t urandom_read_nowarn(struct file *file, char __user *buf, { int ret; - nbytes = min_t(size_t, nbytes, INT_MAX >> (POOL_ENTROPY_SHIFT + 3)); + nbytes = min_t(size_t, nbytes, INT_MAX >> 6); ret = extract_crng_user(buf, nbytes); - trace_urandom_read(8 * nbytes, 0, POOL_ENTROPY_BITS()); + trace_urandom_read(8 * nbytes, 0, input_pool.entropy_count); return ret; } @@ -1527,7 +1471,7 @@ static __poll_t random_poll(struct file *file, poll_table *wait) mask = 0; if (crng_ready()) mask |= EPOLLIN | EPOLLRDNORM; - if (POOL_ENTROPY_BITS() < random_write_wakeup_bits) + if (input_pool.entropy_count < random_write_wakeup_bits) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } @@ -1582,8 +1526,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) switch (cmd) { case RNDGETENTCNT: /* inherently racy, no point locking */ - ent_count = POOL_ENTROPY_BITS(); - if (put_user(ent_count, p)) + if (put_user(input_pool.entropy_count, p)) return -EFAULT; return 0; case RNDADDTOENTCNT: @@ -1734,23 +1677,6 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, return proc_dostring(&fake_table, write, buffer, lenp, ppos); } -/* - * Return entropy available scaled to integral bits - */ -static int proc_do_entropy(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos) -{ - struct ctl_table fake_table; - int entropy_count; - - entropy_count = *(int *)table->data >> POOL_ENTROPY_SHIFT; - - fake_table.data = &entropy_count; - fake_table.maxlen = sizeof(entropy_count); - - return proc_dointvec(&fake_table, write, buffer, lenp, ppos); -} - static int sysctl_poolsize = POOL_BITS; extern struct ctl_table random_table[]; struct ctl_table random_table[] = { @@ -1763,10 +1689,10 @@ struct ctl_table random_table[] = { }, { .procname = "entropy_avail", + .data = &input_pool.entropy_count, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_do_entropy, - .data = &input_pool.entropy_count, + .proc_handler = proc_dointvec, }, { .procname = "write_wakeup_threshold", @@ -1962,7 +1888,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, */ wait_event_interruptible_timeout(random_write_wait, !system_wq || kthread_should_stop() || - POOL_ENTROPY_BITS() <= random_write_wakeup_bits, + input_pool.entropy_count <= random_write_wakeup_bits, CRNG_RESEED_INTERVAL); mix_pool_bytes(buffer, count); credit_entropy_bits(entropy); -- GitLab From 3730490111ca0867658bc6b7393882b30daadda6 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 5 Feb 2022 14:00:58 +0100 Subject: [PATCH 0175/4335] random: always wake up entropy writers after extraction commit 489c7fc44b5740d377e8cfdbf0851036e493af00 upstream. Now that POOL_BITS == POOL_MIN_BITS, we must unconditionally wake up entropy writers after every extraction. Therefore there's no point of write_wakeup_threshold, so we can move it to the dustbin of unused compatibility sysctls. While we're at it, we can fix a small comparison where we were waking up after <= min rather than < min. Cc: Theodore Ts'o Suggested-by: Eric Biggers Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/sysctl/kernel.rst | 7 +++-- drivers/char/random.c | 33 +++++++-------------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index d6977875c1b76..e568f32bc3062 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1019,14 +1019,17 @@ This is a directory, with the following entries: * ``poolsize``: the entropy pool size, in bits; * ``urandom_min_reseed_secs``: obsolete (used to determine the minimum - number of seconds between urandom pool reseeding). + number of seconds between urandom pool reseeding). This file is + writable for compatibility purposes, but writing to it has no effect + on any RNG behavior. * ``uuid``: a UUID generated every time this is retrieved (this can thus be used to generate UUIDs at will); * ``write_wakeup_threshold``: when the entropy count drops below this (as a number of bits), processes waiting to write to ``/dev/random`` - are woken up. + are woken up. This file is writable for compatibility purposes, but + writing to it has no effect on any RNG behavior. If ``drivers/char/random.c`` is built with ``ADD_INTERRUPT_BENCH`` defined, these additional entries are present: diff --git a/drivers/char/random.c b/drivers/char/random.c index f678620a25f8b..6e8176f4a4480 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -296,12 +296,6 @@ enum { */ static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); static struct fasync_struct *fasync; -/* - * If the entropy count falls under this number of bits, then we - * should wake up processes which are selecting or polling on write - * access to /dev/random. - */ -static int random_write_wakeup_bits = POOL_MIN_BITS; static DEFINE_SPINLOCK(random_ready_list_lock); static LIST_HEAD(random_ready_list); @@ -739,10 +733,8 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool) return; } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); extract_entropy(buf.key, sizeof(buf.key)); - if (random_write_wakeup_bits) { - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); } else { _extract_crng(&primary_crng, buf.block); _crng_backtrack_protect(&primary_crng, buf.block, @@ -1471,7 +1463,7 @@ static __poll_t random_poll(struct file *file, poll_table *wait) mask = 0; if (crng_ready()) mask |= EPOLLIN | EPOLLRDNORM; - if (input_pool.entropy_count < random_write_wakeup_bits) + if (input_pool.entropy_count < POOL_MIN_BITS) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } @@ -1556,7 +1548,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (xchg(&input_pool.entropy_count, 0) && random_write_wakeup_bits) { + if (xchg(&input_pool.entropy_count, 0)) { wake_up_interruptible(&random_write_wait); kill_fasync(&fasync, SIGIO, POLL_OUT); } @@ -1636,9 +1628,9 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, #include -static int min_write_thresh; -static int max_write_thresh = POOL_BITS; static int random_min_urandom_seed = 60; +static int random_write_wakeup_bits = POOL_MIN_BITS; +static int sysctl_poolsize = POOL_BITS; static char sysctl_bootid[16]; /* @@ -1677,7 +1669,6 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, return proc_dostring(&fake_table, write, buffer, lenp, ppos); } -static int sysctl_poolsize = POOL_BITS; extern struct ctl_table random_table[]; struct ctl_table random_table[] = { { @@ -1699,9 +1690,7 @@ struct ctl_table random_table[] = { .data = &random_write_wakeup_bits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_write_thresh, - .extra2 = &max_write_thresh, + .proc_handler = proc_dointvec, }, { .procname = "urandom_min_reseed_secs", @@ -1882,13 +1871,13 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, } /* Throttle writing if we're above the trickle threshold. - * We'll be woken up again once below random_write_wakeup_thresh, - * when the calling thread is about to terminate, or once - * CRNG_RESEED_INTERVAL has lapsed. + * We'll be woken up again once below POOL_MIN_BITS, when + * the calling thread is about to terminate, or once + * CRNG_RESEED_INTERVAL has elapsed. */ wait_event_interruptible_timeout(random_write_wait, !system_wq || kthread_should_stop() || - input_pool.entropy_count <= random_write_wakeup_bits, + input_pool.entropy_count < POOL_MIN_BITS, CRNG_RESEED_INTERVAL); mix_pool_bytes(buffer, count); credit_entropy_bits(entropy); -- GitLab From 1bc9db59b24c14fc73d1a1d8288064edd5e0ac10 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 4 Feb 2022 01:45:53 +0100 Subject: [PATCH 0176/4335] random: make credit_entropy_bits() always safe commit a49c010e61e1938be851f5e49ac219d49b704103 upstream. This is called from various hwgenerator drivers, so rather than having one "safe" version for userspace and one "unsafe" version for the kernel, just make everything safe; the checks are cheap and sensible to have anyway. Reported-by: Sultan Alsawaf Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 6e8176f4a4480..d5a2714a8126d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -447,18 +447,15 @@ static void process_random_ready_list(void) spin_unlock_irqrestore(&random_ready_list_lock, flags); } -/* - * Credit (or debit) the entropy store with n bits of entropy. - * Use credit_entropy_bits_safe() if the value comes from userspace - * or otherwise should be checked for extreme values. - */ static void credit_entropy_bits(int nbits) { int entropy_count, orig; - if (!nbits) + if (nbits <= 0) return; + nbits = min(nbits, POOL_BITS); + do { orig = READ_ONCE(input_pool.entropy_count); entropy_count = min(POOL_BITS, orig + nbits); @@ -470,18 +467,6 @@ static void credit_entropy_bits(int nbits) crng_reseed(&primary_crng, true); } -static int credit_entropy_bits_safe(int nbits) -{ - if (nbits < 0) - return -EINVAL; - - /* Cap the value to avoid overflows */ - nbits = min(nbits, POOL_BITS); - - credit_entropy_bits(nbits); - return 0; -} - /********************************************************************* * * CRNG using CHACHA20 @@ -1526,7 +1511,10 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (get_user(ent_count, p)) return -EFAULT; - return credit_entropy_bits_safe(ent_count); + if (ent_count < 0) + return -EINVAL; + credit_entropy_bits(ent_count); + return 0; case RNDADDENTROPY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1539,7 +1527,8 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) retval = write_pool((const char __user *)p, size); if (retval < 0) return retval; - return credit_entropy_bits_safe(ent_count); + credit_entropy_bits(ent_count); + return 0; case RNDZAPENTCNT: case RNDCLEARPOOL: /* -- GitLab From ad86aa8ec90fcc4ce0c2bfdb629b7d2fa3d611a9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 4 Feb 2022 14:17:33 -0800 Subject: [PATCH 0177/4335] random: remove use_input_pool parameter from crng_reseed() commit 5d58ea3a31cc98b9fa563f6921d3d043bf0103d1 upstream. The primary_crng is always reseeded from the input_pool, while the NUMA crngs are always reseeded from the primary_crng. Remove the redundant 'use_input_pool' parameter from crng_reseed() and just directly check whether the crng is the primary_crng. Signed-off-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index d5a2714a8126d..8dc1e22bb241e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -365,7 +365,7 @@ static struct { static void extract_entropy(void *buf, size_t nbytes); -static void crng_reseed(struct crng_state *crng, bool use_input_pool); +static void crng_reseed(struct crng_state *crng); /* * This function adds bytes into the entropy "pool". It does not @@ -464,7 +464,7 @@ static void credit_entropy_bits(int nbits) trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_); if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) - crng_reseed(&primary_crng, true); + crng_reseed(&primary_crng); } /********************************************************************* @@ -701,7 +701,7 @@ static int crng_slow_load(const u8 *cp, size_t len) return 1; } -static void crng_reseed(struct crng_state *crng, bool use_input_pool) +static void crng_reseed(struct crng_state *crng) { unsigned long flags; int i; @@ -710,7 +710,7 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool) u32 key[8]; } buf; - if (use_input_pool) { + if (crng == &primary_crng) { int entropy_count; do { entropy_count = READ_ONCE(input_pool.entropy_count); @@ -748,7 +748,7 @@ static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]) init_time = READ_ONCE(crng->init_time); if (time_after(READ_ONCE(crng_global_init_time), init_time) || time_after(jiffies, init_time + CRNG_RESEED_INTERVAL)) - crng_reseed(crng, crng == &primary_crng); + crng_reseed(crng); } spin_lock_irqsave(&crng->lock, flags); chacha20_block(&crng->state[0], out); @@ -1547,7 +1547,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (crng_init < 2) return -ENODATA; - crng_reseed(&primary_crng, true); + crng_reseed(&primary_crng); WRITE_ONCE(crng_global_init_time, jiffies - 1); return 0; default: -- GitLab From 67fdda9af773b124de1609d946da4a84507898dc Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 28 Jan 2022 23:29:45 +0100 Subject: [PATCH 0178/4335] random: remove batched entropy locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 77760fd7f7ae3dfd03668204e708d1568d75447d upstream. Rather than use spinlocks to protect batched entropy, we can instead disable interrupts locally, since we're dealing with per-cpu data, and manage resets with a basic generation counter. At the same time, we can't quite do this on PREEMPT_RT, where we still want spinlocks-as- mutexes semantics. So we use a local_lock_t, which provides the right behavior for each. Because this is a per-cpu lock, that generation counter is still doing the necessary CPU-to-CPU communication. This should improve performance a bit. It will also fix the linked splat that Jonathan received with a PROVE_RAW_LOCK_NESTING=y. Reviewed-by: Sebastian Andrzej Siewior Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Suggested-by: Andy Lutomirski Reported-by: Jonathan Neuschäfer Tested-by: Jonathan Neuschäfer Link: https://lore.kernel.org/lkml/YfMa0QgsjCVdRAvJ@latitude/ Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 55 ++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 8dc1e22bb241e..cf42ee10633e7 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1721,13 +1721,16 @@ struct ctl_table random_table[] = { }; #endif /* CONFIG_SYSCTL */ +static atomic_t batch_generation = ATOMIC_INIT(0); + struct batched_entropy { union { u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)]; u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)]; }; + local_lock_t lock; unsigned int position; - spinlock_t batch_lock; + int generation; }; /* @@ -1739,7 +1742,7 @@ struct batched_entropy { * point prior. */ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { - .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), + .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock) }; u64 get_random_u64(void) @@ -1748,67 +1751,65 @@ u64 get_random_u64(void) unsigned long flags; struct batched_entropy *batch; static void *previous; + int next_gen; warn_unseeded_randomness(&previous); + local_lock_irqsave(&batched_entropy_u64.lock, flags); batch = raw_cpu_ptr(&batched_entropy_u64); - spin_lock_irqsave(&batch->batch_lock, flags); - if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { + + next_gen = atomic_read(&batch_generation); + if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0 || + next_gen != batch->generation) { extract_crng((u8 *)batch->entropy_u64); batch->position = 0; + batch->generation = next_gen; } + ret = batch->entropy_u64[batch->position++]; - spin_unlock_irqrestore(&batch->batch_lock, flags); + local_unlock_irqrestore(&batched_entropy_u64.lock, flags); return ret; } EXPORT_SYMBOL(get_random_u64); static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { - .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock), + .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock) }; + u32 get_random_u32(void) { u32 ret; unsigned long flags; struct batched_entropy *batch; static void *previous; + int next_gen; warn_unseeded_randomness(&previous); + local_lock_irqsave(&batched_entropy_u32.lock, flags); batch = raw_cpu_ptr(&batched_entropy_u32); - spin_lock_irqsave(&batch->batch_lock, flags); - if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { + + next_gen = atomic_read(&batch_generation); + if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0 || + next_gen != batch->generation) { extract_crng((u8 *)batch->entropy_u32); batch->position = 0; + batch->generation = next_gen; } + ret = batch->entropy_u32[batch->position++]; - spin_unlock_irqrestore(&batch->batch_lock, flags); + local_unlock_irqrestore(&batched_entropy_u32.lock, flags); return ret; } EXPORT_SYMBOL(get_random_u32); /* It's important to invalidate all potential batched entropy that might * be stored before the crng is initialized, which we can do lazily by - * simply resetting the counter to zero so that it's re-extracted on the - * next usage. */ + * bumping the generation counter. + */ static void invalidate_batched_entropy(void) { - int cpu; - unsigned long flags; - - for_each_possible_cpu(cpu) { - struct batched_entropy *batched_entropy; - - batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu); - spin_lock_irqsave(&batched_entropy->batch_lock, flags); - batched_entropy->position = 0; - spin_unlock(&batched_entropy->batch_lock); - - batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu); - spin_lock(&batched_entropy->batch_lock); - batched_entropy->position = 0; - spin_unlock_irqrestore(&batched_entropy->batch_lock, flags); - } + atomic_inc(&batch_generation); } /** -- GitLab From 704c76e08804c496fca35d627215ad1011a20aa8 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 5 Feb 2022 11:34:57 +0100 Subject: [PATCH 0179/4335] random: fix locking in crng_fast_load() commit 7c2fe2b32bf76441ff5b7a425b384e5f75aa530a upstream. crng_init is protected by primary_crng->lock, so keep holding that lock when incrementing crng_init from 0 to 1 in crng_fast_load(). The call to pr_notice() can wait until the lock is released; this code path cannot be reached twice, as crng_fast_load() aborts early if crng_init > 0. Signed-off-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index cf42ee10633e7..317864de62c86 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -647,12 +647,13 @@ static size_t crng_fast_load(const u8 *cp, size_t len) p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; cp++; crng_init_cnt++; len--; ret++; } - spin_unlock_irqrestore(&primary_crng.lock, flags); if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { invalidate_batched_entropy(); crng_init = 1; - pr_notice("fast init done\n"); } + spin_unlock_irqrestore(&primary_crng.lock, flags); + if (crng_init == 1) + pr_notice("fast init done\n"); return ret; } -- GitLab From ded4cc806d0d9b4a552e8d8fb398155e6e66d9c0 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 8 Feb 2022 12:18:33 +0100 Subject: [PATCH 0180/4335] random: use RDSEED instead of RDRAND in entropy extraction commit 28f425e573e906a4c15f8392cc2b1561ef448595 upstream. When /dev/random was directly connected with entropy extraction, without any expansion stage, extract_buf() was called for every 10 bytes of data read from /dev/random. For that reason, RDRAND was used rather than RDSEED. At the same time, crng_reseed() was still only called every 5 minutes, so there RDSEED made sense. Those olden days were also a time when the entropy collector did not use a cryptographic hash function, which meant most bets were off in terms of real preimage resistance. For that reason too it didn't matter _that_ much whether RDSEED was mixed in before or after entropy extraction; both choices were sort of bad. But now we have a cryptographic hash function at work, and with that we get real preimage resistance. We also now only call extract_entropy() every 5 minutes, rather than every 10 bytes. This allows us to do two important things. First, we can switch to using RDSEED in extract_entropy(), as Dominik suggested. Second, we can ensure that RDSEED input always goes into the cryptographic hash function with other things before being used directly. This eliminates a category of attacks in which the CPU knows the current state of the crng and knows that we're going to xor RDSEED into it, and so it computes a malicious RDSEED. By going through our hash function, it would require the CPU to compute a preimage on the fly, which isn't going to happen. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Suggested-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 317864de62c86..39132383adb8a 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -727,13 +727,8 @@ static void crng_reseed(struct crng_state *crng) CHACHA_KEY_SIZE); } spin_lock_irqsave(&crng->lock, flags); - for (i = 0; i < 8; i++) { - unsigned long rv; - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) - rv = random_get_entropy(); - crng->state[i + 4] ^= buf.key[i] ^ rv; - } + for (i = 0; i < 8; i++) + crng->state[i + 4] ^= buf.key[i]; memzero_explicit(&buf, sizeof(buf)); WRITE_ONCE(crng->init_time, jiffies); spin_unlock_irqrestore(&crng->lock, flags); @@ -1054,16 +1049,17 @@ static void extract_entropy(void *buf, size_t nbytes) unsigned long flags; u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; struct { - unsigned long rdrand[32 / sizeof(long)]; + unsigned long rdseed[32 / sizeof(long)]; size_t counter; } block; size_t i; trace_extract_entropy(nbytes, input_pool.entropy_count); - for (i = 0; i < ARRAY_SIZE(block.rdrand); ++i) { - if (!arch_get_random_long(&block.rdrand[i])) - block.rdrand[i] = random_get_entropy(); + for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) { + if (!arch_get_random_seed_long(&block.rdseed[i]) && + !arch_get_random_long(&block.rdseed[i])) + block.rdseed[i] = random_get_entropy(); } spin_lock_irqsave(&input_pool.lock, flags); @@ -1071,7 +1067,7 @@ static void extract_entropy(void *buf, size_t nbytes) /* seed = HASHPRF(last_key, entropy_input) */ blake2s_final(&input_pool.hash, seed); - /* next_key = HASHPRF(seed, RDRAND || 0) */ + /* next_key = HASHPRF(seed, RDSEED || 0) */ block.counter = 0; blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed)); blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key)); @@ -1081,7 +1077,7 @@ static void extract_entropy(void *buf, size_t nbytes) while (nbytes) { i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE); - /* output = HASHPRF(seed, RDRAND || ++counter) */ + /* output = HASHPRF(seed, RDSEED || ++counter) */ ++block.counter; blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); nbytes -= i; -- GitLab From 5e4fab6fefb11feb1b18e60500634ecc46f398f3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 6 Feb 2022 23:51:41 +0100 Subject: [PATCH 0181/4335] random: get rid of secondary crngs commit a9412d510ab9a9ba411fea612903631d2e1f1601 upstream. As the comment said, this is indeed a "hack". Since it was introduced, it's been a constant state machine nightmare, with lots of subtle early boot issues and a wildly complex set of machinery to keep everything in sync. Rather than continuing to play whack-a-mole with this approach, this commit simply removes it entirely. This commit is preparation for "random: use simpler fast key erasure flow on per-cpu keys" in this series, which introduces a simpler (and faster) mechanism to accomplish the same thing. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 225 ++++++++++-------------------------------- 1 file changed, 53 insertions(+), 172 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 39132383adb8a..a1c1b5b0b0a85 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -323,14 +323,11 @@ static struct crng_state primary_crng = { * its value (from 0->1->2). */ static int crng_init = 0; -static bool crng_need_final_init = false; #define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; -static unsigned long crng_global_init_time = 0; #define CRNG_INIT_CNT_THRESH (2 * CHACHA_KEY_SIZE) -static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]); -static void _crng_backtrack_protect(struct crng_state *crng, - u8 tmp[CHACHA_BLOCK_SIZE], int used); +static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]); +static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used); static void process_random_ready_list(void); static void _get_random_bytes(void *buf, int nbytes); @@ -365,7 +362,7 @@ static struct { static void extract_entropy(void *buf, size_t nbytes); -static void crng_reseed(struct crng_state *crng); +static void crng_reseed(void); /* * This function adds bytes into the entropy "pool". It does not @@ -464,7 +461,7 @@ static void credit_entropy_bits(int nbits) trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_); if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) - crng_reseed(&primary_crng); + crng_reseed(); } /********************************************************************* @@ -477,16 +474,7 @@ static void credit_entropy_bits(int nbits) static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); -/* - * Hack to deal with crazy userspace progams when they are all trying - * to access /dev/urandom in parallel. The programs are almost - * certainly doing something terribly wrong, but we'll work around - * their brain damage. - */ -static struct crng_state **crng_node_pool __read_mostly; - static void invalidate_batched_entropy(void); -static void numa_crng_init(void); static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); static int __init parse_trust_cpu(char *arg) @@ -495,24 +483,6 @@ static int __init parse_trust_cpu(char *arg) } early_param("random.trust_cpu", parse_trust_cpu); -static bool crng_init_try_arch(struct crng_state *crng) -{ - int i; - bool arch_init = true; - unsigned long rv; - - for (i = 4; i < 16; i++) { - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) { - rv = random_get_entropy(); - arch_init = false; - } - crng->state[i] ^= rv; - } - - return arch_init; -} - static bool __init crng_init_try_arch_early(void) { int i; @@ -531,100 +501,17 @@ static bool __init crng_init_try_arch_early(void) return arch_init; } -static void crng_initialize_secondary(struct crng_state *crng) -{ - chacha_init_consts(crng->state); - _get_random_bytes(&crng->state[4], sizeof(u32) * 12); - crng_init_try_arch(crng); - crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; -} - -static void __init crng_initialize_primary(void) +static void __init crng_initialize(void) { extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); if (crng_init_try_arch_early() && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); - numa_crng_init(); crng_init = 2; pr_notice("crng init done (trusting CPU's manufacturer)\n"); } primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } -static void crng_finalize_init(void) -{ - if (!system_wq) { - /* We can't call numa_crng_init until we have workqueues, - * so mark this for processing later. */ - crng_need_final_init = true; - return; - } - - invalidate_batched_entropy(); - numa_crng_init(); - crng_init = 2; - crng_need_final_init = false; - process_random_ready_list(); - wake_up_interruptible(&crng_init_wait); - kill_fasync(&fasync, SIGIO, POLL_IN); - pr_notice("crng init done\n"); - if (unseeded_warning.missed) { - pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", - unseeded_warning.missed); - unseeded_warning.missed = 0; - } - if (urandom_warning.missed) { - pr_notice("%d urandom warning(s) missed due to ratelimiting\n", - urandom_warning.missed); - urandom_warning.missed = 0; - } -} - -static void do_numa_crng_init(struct work_struct *work) -{ - int i; - struct crng_state *crng; - struct crng_state **pool; - - pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL | __GFP_NOFAIL); - for_each_online_node(i) { - crng = kmalloc_node(sizeof(struct crng_state), - GFP_KERNEL | __GFP_NOFAIL, i); - spin_lock_init(&crng->lock); - crng_initialize_secondary(crng); - pool[i] = crng; - } - /* pairs with READ_ONCE() in select_crng() */ - if (cmpxchg_release(&crng_node_pool, NULL, pool) != NULL) { - for_each_node(i) - kfree(pool[i]); - kfree(pool); - } -} - -static DECLARE_WORK(numa_crng_init_work, do_numa_crng_init); - -static void numa_crng_init(void) -{ - if (IS_ENABLED(CONFIG_NUMA)) - schedule_work(&numa_crng_init_work); -} - -static struct crng_state *select_crng(void) -{ - if (IS_ENABLED(CONFIG_NUMA)) { - struct crng_state **pool; - int nid = numa_node_id(); - - /* pairs with cmpxchg_release() in do_numa_crng_init() */ - pool = READ_ONCE(crng_node_pool); - if (pool && pool[nid]) - return pool[nid]; - } - - return &primary_crng; -} - /* * crng_fast_load() can be called by code in the interrupt service * path. So we can't afford to dilly-dally. Returns the number of @@ -702,68 +589,71 @@ static int crng_slow_load(const u8 *cp, size_t len) return 1; } -static void crng_reseed(struct crng_state *crng) +static void crng_reseed(void) { unsigned long flags; - int i; + int i, entropy_count; union { u8 block[CHACHA_BLOCK_SIZE]; u32 key[8]; } buf; - if (crng == &primary_crng) { - int entropy_count; - do { - entropy_count = READ_ONCE(input_pool.entropy_count); - if (entropy_count < POOL_MIN_BITS) - return; - } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); - extract_entropy(buf.key, sizeof(buf.key)); - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } else { - _extract_crng(&primary_crng, buf.block); - _crng_backtrack_protect(&primary_crng, buf.block, - CHACHA_KEY_SIZE); - } - spin_lock_irqsave(&crng->lock, flags); + do { + entropy_count = READ_ONCE(input_pool.entropy_count); + if (entropy_count < POOL_MIN_BITS) + return; + } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); + extract_entropy(buf.key, sizeof(buf.key)); + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + + spin_lock_irqsave(&primary_crng.lock, flags); for (i = 0; i < 8; i++) - crng->state[i + 4] ^= buf.key[i]; + primary_crng.state[i + 4] ^= buf.key[i]; memzero_explicit(&buf, sizeof(buf)); - WRITE_ONCE(crng->init_time, jiffies); - spin_unlock_irqrestore(&crng->lock, flags); - if (crng == &primary_crng && crng_init < 2) - crng_finalize_init(); + WRITE_ONCE(primary_crng.init_time, jiffies); + spin_unlock_irqrestore(&primary_crng.lock, flags); + if (crng_init < 2) { + invalidate_batched_entropy(); + crng_init = 2; + process_random_ready_list(); + wake_up_interruptible(&crng_init_wait); + kill_fasync(&fasync, SIGIO, POLL_IN); + pr_notice("crng init done\n"); + if (unseeded_warning.missed) { + pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", + unseeded_warning.missed); + unseeded_warning.missed = 0; + } + if (urandom_warning.missed) { + pr_notice("%d urandom warning(s) missed due to ratelimiting\n", + urandom_warning.missed); + urandom_warning.missed = 0; + } + } } -static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]) +static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]) { unsigned long flags, init_time; if (crng_ready()) { - init_time = READ_ONCE(crng->init_time); - if (time_after(READ_ONCE(crng_global_init_time), init_time) || - time_after(jiffies, init_time + CRNG_RESEED_INTERVAL)) - crng_reseed(crng); + init_time = READ_ONCE(primary_crng.init_time); + if (time_after(jiffies, init_time + CRNG_RESEED_INTERVAL)) + crng_reseed(); } - spin_lock_irqsave(&crng->lock, flags); - chacha20_block(&crng->state[0], out); - if (crng->state[12] == 0) - crng->state[13]++; - spin_unlock_irqrestore(&crng->lock, flags); -} - -static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]) -{ - _extract_crng(select_crng(), out); + spin_lock_irqsave(&primary_crng.lock, flags); + chacha20_block(&primary_crng.state[0], out); + if (primary_crng.state[12] == 0) + primary_crng.state[13]++; + spin_unlock_irqrestore(&primary_crng.lock, flags); } /* * Use the leftover bytes from the CRNG block output (if there is * enough) to mutate the CRNG key to provide backtracking protection. */ -static void _crng_backtrack_protect(struct crng_state *crng, - u8 tmp[CHACHA_BLOCK_SIZE], int used) +static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used) { unsigned long flags; u32 *s, *d; @@ -774,17 +664,12 @@ static void _crng_backtrack_protect(struct crng_state *crng, extract_crng(tmp); used = 0; } - spin_lock_irqsave(&crng->lock, flags); + spin_lock_irqsave(&primary_crng.lock, flags); s = (u32 *)&tmp[used]; - d = &crng->state[4]; + d = &primary_crng.state[4]; for (i = 0; i < 8; i++) *d++ ^= *s++; - spin_unlock_irqrestore(&crng->lock, flags); -} - -static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used) -{ - _crng_backtrack_protect(select_crng(), tmp, used); + spin_unlock_irqrestore(&primary_crng.lock, flags); } static ssize_t extract_crng_user(void __user *buf, size_t nbytes) @@ -1371,10 +1256,7 @@ static void __init init_std_data(void) int __init rand_initialize(void) { init_std_data(); - if (crng_need_final_init) - crng_finalize_init(); - crng_initialize_primary(); - crng_global_init_time = jiffies; + crng_initialize(); if (ratelimit_disable) { urandom_warning.interval = 0; unseeded_warning.interval = 0; @@ -1544,8 +1426,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (crng_init < 2) return -ENODATA; - crng_reseed(&primary_crng); - WRITE_ONCE(crng_global_init_time, jiffies - 1); + crng_reseed(); return 0; default: return -EINVAL; -- GitLab From fd6eb28005898786f99be2b54c4861991fa14f48 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 8 Feb 2022 12:40:14 +0100 Subject: [PATCH 0182/4335] random: inline leaves of rand_initialize() commit 8566417221fcec51346ec164e920dacb979c6b5f upstream. This is a preparatory commit for the following one. We simply inline the various functions that rand_initialize() calls that have no other callers. The compiler was doing this anyway before. Doing this will allow us to reorganize this after. We can then move the trust_cpu and parse_trust_cpu definitions a bit closer to where they're actually used, which makes the code easier to read. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 90 ++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 57 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index a1c1b5b0b0a85..c7b3256c543b1 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -476,42 +476,6 @@ static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); static void invalidate_batched_entropy(void); -static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); -static int __init parse_trust_cpu(char *arg) -{ - return kstrtobool(arg, &trust_cpu); -} -early_param("random.trust_cpu", parse_trust_cpu); - -static bool __init crng_init_try_arch_early(void) -{ - int i; - bool arch_init = true; - unsigned long rv; - - for (i = 4; i < 16; i++) { - if (!arch_get_random_seed_long_early(&rv) && - !arch_get_random_long_early(&rv)) { - rv = random_get_entropy(); - arch_init = false; - } - primary_crng.state[i] ^= rv; - } - - return arch_init; -} - -static void __init crng_initialize(void) -{ - extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); - if (crng_init_try_arch_early() && trust_cpu && crng_init < 2) { - invalidate_batched_entropy(); - crng_init = 2; - pr_notice("crng init done (trusting CPU's manufacturer)\n"); - } - primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1; -} - /* * crng_fast_load() can be called by code in the interrupt service * path. So we can't afford to dilly-dally. Returns the number of @@ -1220,17 +1184,28 @@ int __must_check get_random_bytes_arch(void *buf, int nbytes) } EXPORT_SYMBOL(get_random_bytes_arch); +static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); +static int __init parse_trust_cpu(char *arg) +{ + return kstrtobool(arg, &trust_cpu); +} +early_param("random.trust_cpu", parse_trust_cpu); + /* - * init_std_data - initialize pool with system data - * - * This function clears the pool's entropy count and mixes some system - * data into the pool to prepare it for use. The pool is not cleared - * as that can only decrease the entropy in the pool. + * Note that setup_arch() may call add_device_randomness() + * long before we get here. This allows seeding of the pools + * with some platform dependent data very early in the boot + * process. But it limits our options here. We must use + * statically allocated structures that already have all + * initializations complete at compile time. We should also + * take care not to overwrite the precious per platform data + * we were given. */ -static void __init init_std_data(void) +int __init rand_initialize(void) { int i; ktime_t now = ktime_get_real(); + bool arch_init = true; unsigned long rv; mix_pool_bytes(&now, sizeof(now)); @@ -1241,22 +1216,23 @@ static void __init init_std_data(void) mix_pool_bytes(&rv, sizeof(rv)); } mix_pool_bytes(utsname(), sizeof(*(utsname()))); -} -/* - * Note that setup_arch() may call add_device_randomness() - * long before we get here. This allows seeding of the pools - * with some platform dependent data very early in the boot - * process. But it limits our options here. We must use - * statically allocated structures that already have all - * initializations complete at compile time. We should also - * take care not to overwrite the precious per platform data - * we were given. - */ -int __init rand_initialize(void) -{ - init_std_data(); - crng_initialize(); + extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); + for (i = 4; i < 16; i++) { + if (!arch_get_random_seed_long_early(&rv) && + !arch_get_random_long_early(&rv)) { + rv = random_get_entropy(); + arch_init = false; + } + primary_crng.state[i] ^= rv; + } + if (arch_init && trust_cpu && crng_init < 2) { + invalidate_batched_entropy(); + crng_init = 2; + pr_notice("crng init done (trusting CPU's manufacturer)\n"); + } + primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1; + if (ratelimit_disable) { urandom_warning.interval = 0; unseeded_warning.interval = 0; -- GitLab From b51caadb18a7b42ad7517a290fa876e130a0d10f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 8 Feb 2022 12:44:28 +0100 Subject: [PATCH 0183/4335] random: ensure early RDSEED goes through mixer on init commit a02cf3d0dd77244fd5333ac48d78871de459ae6d upstream. Continuing the reasoning of "random: use RDSEED instead of RDRAND in entropy extraction" from this series, at init time we also don't want to be xoring RDSEED directly into the crng. Instead it's safer to put it into our entropy collector and then re-extract it, so that it goes through a hash function with preimage resistance. As a matter of hygiene, we also order these now so that the RDSEED byte are hashed in first, followed by the bytes that are likely more predictable (e.g. utsname()). Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index c7b3256c543b1..092b31354cf3d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1208,24 +1208,18 @@ int __init rand_initialize(void) bool arch_init = true; unsigned long rv; - mix_pool_bytes(&now, sizeof(now)); for (i = BLAKE2S_BLOCK_SIZE; i > 0; i -= sizeof(rv)) { - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) - rv = random_get_entropy(); - mix_pool_bytes(&rv, sizeof(rv)); - } - mix_pool_bytes(utsname(), sizeof(*(utsname()))); - - extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); - for (i = 4; i < 16; i++) { if (!arch_get_random_seed_long_early(&rv) && !arch_get_random_long_early(&rv)) { rv = random_get_entropy(); arch_init = false; } - primary_crng.state[i] ^= rv; + mix_pool_bytes(&rv, sizeof(rv)); } + mix_pool_bytes(&now, sizeof(now)); + mix_pool_bytes(utsname(), sizeof(*(utsname()))); + + extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); if (arch_init && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); crng_init = 2; -- GitLab From 4a18db164cbbca1798720eff3ef29b8b46148314 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 8 Feb 2022 13:00:11 +0100 Subject: [PATCH 0184/4335] random: do not xor RDRAND when writing into /dev/random commit 91c2afca290ed3034841c8c8532e69ed9e16cf34 upstream. Continuing the reasoning of "random: ensure early RDSEED goes through mixer on init", we don't want RDRAND interacting with anything without going through the mixer function, as a backdoored CPU could presumably cancel out data during an xor, which it'd have a harder time doing when being forced through a cryptographic hash function. There's actually no need at all to be calling RDRAND in write_pool(), because before we extract from the pool, we always do so with 32 bytes of RDSEED hashed in at that stage. Xoring at this stage is needless and introduces a minor liability. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 092b31354cf3d..3f6510e2db928 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1305,25 +1305,15 @@ static __poll_t random_poll(struct file *file, poll_table *wait) static int write_pool(const char __user *buffer, size_t count) { size_t bytes; - u32 t, buf[16]; + u8 buf[BLAKE2S_BLOCK_SIZE]; const char __user *p = buffer; while (count > 0) { - int b, i = 0; - bytes = min(count, sizeof(buf)); - if (copy_from_user(&buf, p, bytes)) + if (copy_from_user(buf, p, bytes)) return -EFAULT; - - for (b = bytes; b > 0; b -= sizeof(u32), i++) { - if (!arch_get_random_int(&t)) - break; - buf[i] ^= t; - } - count -= bytes; p += bytes; - mix_pool_bytes(buf, bytes); cond_resched(); } -- GitLab From 1aab83f9666398b7831293d7c3db8f568b21ba08 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 9 Feb 2022 01:56:35 +0100 Subject: [PATCH 0185/4335] random: absorb fast pool into input pool after fast load commit c30c575db4858f0bbe5e315ff2e529c782f33a1f upstream. During crng_init == 0, we never credit entropy in add_interrupt_ randomness(), but instead dump it directly into the primary_crng. That's fine, except for the fact that we then wind up throwing away that entropy later when we switch to extracting from the input pool and xoring into (and later in this series overwriting) the primary_crng key. The two other early init sites -- add_hwgenerator_randomness()'s use crng_fast_load() and add_device_ randomness()'s use of crng_slow_load() -- always additionally give their inputs to the input pool. But not add_interrupt_randomness(). This commit fixes that shortcoming by calling mix_pool_bytes() after crng_fast_load() in add_interrupt_randomness(). That's partially verboten on PREEMPT_RT, where it implies taking spinlock_t from an IRQ handler. But this also only happens during early boot and then never again after that. Plus it's a trylock so it has the same considerations as calling crng_fast_load(), which we're already using. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Suggested-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index 3f6510e2db928..d3ad07bb8990d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -850,6 +850,10 @@ void add_interrupt_randomness(int irq) crng_fast_load((u8 *)fast_pool->pool, sizeof(fast_pool->pool)) > 0) { fast_pool->count = 0; fast_pool->last = now; + if (spin_trylock(&input_pool.lock)) { + _mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool)); + spin_unlock(&input_pool.lock); + } } return; } -- GitLab From b27bd09d05444a0a9e00de62fca7ff8a7b497c88 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 7 Feb 2022 15:08:49 +0100 Subject: [PATCH 0186/4335] random: use simpler fast key erasure flow on per-cpu keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 186873c549df11b63e17062f863654e1501e1524 upstream. Rather than the clunky NUMA full ChaCha state system we had prior, this commit is closer to the original "fast key erasure RNG" proposal from , by simply treating ChaCha keys on a per-cpu basis. All entropy is extracted to a base crng key of 32 bytes. This base crng has a birthdate and a generation counter. When we go to take bytes from the crng, we first check if the birthdate is too old; if it is, we reseed per usual. Then we start working on a per-cpu crng. This per-cpu crng makes sure that it has the same generation counter as the base crng. If it doesn't, it does fast key erasure with the base crng key and uses the output as its new per-cpu key, and then updates its local generation counter. Then, using this per-cpu state, we do ordinary fast key erasure. Half of this first block is used to overwrite the per-cpu crng key for the next call -- this is the fast key erasure RNG idea -- and the other half, along with the ChaCha state, is returned to the caller. If the caller desires more than this remaining half, it can generate more ChaCha blocks, unlocked, using the now detached ChaCha state that was just returned. Crypto-wise, this is more or less what we were doing before, but this simply makes it more explicit and ensures that we always have backtrack protection by not playing games with a shared block counter. The flow looks like this: ──extract()──► base_crng.key ◄──memcpy()───┐ │ │ └──chacha()──────┬─► new_base_key └─► crngs[n].key ◄──memcpy()───┐ │ │ └──chacha()───┬─► new_key └─► random_bytes │ └────► There are a few hairy details around early init. Just as was done before, prior to having gathered enough entropy, crng_fast_load() and crng_slow_load() dump bytes directly into the base crng, and when we go to take bytes from the crng, in that case, we're doing fast key erasure with the base crng rather than the fast unlocked per-cpu crngs. This is fine as that's only the state of affairs during very early boot; once the crng initializes we never use these paths again. In the process of all this, the APIs into the crng become a bit simpler: we have get_random_bytes(buf, len) and get_random_bytes_user(buf, len), which both do what you'd expect. All of the details of fast key erasure and per-cpu selection happen only in a very short critical section of crng_make_state(), which selects the right per-cpu key, does the fast key erasure, and returns a local state to the caller's stack. So, we no longer have a need for a separate backtrack function, as this happens all at once here. The API then allows us to extend backtrack protection to batched entropy without really having to do much at all. The result is a bit simpler than before and has fewer foot guns. The init time state machine also gets a lot simpler as we don't need to wait for workqueues to come online and do deferred work. And the multi-core performance should be increased significantly, by virtue of having hardly any locking on the fast path. Cc: Theodore Ts'o Cc: Dominik Brodowski Cc: Sebastian Andrzej Siewior Reviewed-by: Jann Horn Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 395 ++++++++++++++++++++++++------------------ 1 file changed, 229 insertions(+), 166 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index d3ad07bb8990d..626c53c21b7ea 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -67,63 +67,19 @@ * Exported interfaces ---- kernel output * -------------------------------------- * - * The primary kernel interface is + * The primary kernel interfaces are: * * void get_random_bytes(void *buf, int nbytes); - * - * This interface will return the requested number of random bytes, - * and place it in the requested buffer. This is equivalent to a - * read from /dev/urandom. - * - * For less critical applications, there are the functions: - * * u32 get_random_u32() * u64 get_random_u64() * unsigned int get_random_int() * unsigned long get_random_long() * - * These are produced by a cryptographic RNG seeded from get_random_bytes, - * and so do not deplete the entropy pool as much. These are recommended - * for most in-kernel operations *if the result is going to be stored in - * the kernel*. - * - * Specifically, the get_random_int() family do not attempt to do - * "anti-backtracking". If you capture the state of the kernel (e.g. - * by snapshotting the VM), you can figure out previous get_random_int() - * return values. But if the value is stored in the kernel anyway, - * this is not a problem. - * - * It *is* safe to expose get_random_int() output to attackers (e.g. as - * network cookies); given outputs 1..n, it's not feasible to predict - * outputs 0 or n+1. The only concern is an attacker who breaks into - * the kernel later; the get_random_int() engine is not reseeded as - * often as the get_random_bytes() one. - * - * get_random_bytes() is needed for keys that need to stay secret after - * they are erased from the kernel. For example, any key that will - * be wrapped and stored encrypted. And session encryption keys: we'd - * like to know that after the session is closed and the keys erased, - * the plaintext is unrecoverable to someone who recorded the ciphertext. - * - * But for network ports/cookies, stack canaries, PRNG seeds, address - * space layout randomization, session *authentication* keys, or other - * applications where the sensitive data is stored in the kernel in - * plaintext for as long as it's sensitive, the get_random_int() family - * is just fine. - * - * Consider ASLR. We want to keep the address space secret from an - * outside attacker while the process is running, but once the address - * space is torn down, it's of no use to an attacker any more. And it's - * stored in kernel data structures as long as it's alive, so worrying - * about an attacker's ability to extrapolate it from the get_random_int() - * CRNG is silly. - * - * Even some cryptographic keys are safe to generate with get_random_int(). - * In particular, keys for SipHash are generally fine. Here, knowledge - * of the key authorizes you to do something to a kernel object (inject - * packets to a network connection, or flood a hash table), and the - * key is stored with the object being protected. Once it goes away, - * we no longer care if anyone knows the key. + * These interfaces will return the requested number of random bytes + * into the given buffer or as a return value. This is equivalent to a + * read from /dev/urandom. The get_random_{u32,u64,int,long}() family + * of functions may be higher performance for one-off random integers, + * because they do a bit of buffering. * * prandom_u32() * ------------- @@ -300,20 +256,6 @@ static struct fasync_struct *fasync; static DEFINE_SPINLOCK(random_ready_list_lock); static LIST_HEAD(random_ready_list); -struct crng_state { - u32 state[16]; - unsigned long init_time; - spinlock_t lock; -}; - -static struct crng_state primary_crng = { - .lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock), - .state[0] = CHACHA_CONSTANT_EXPA, - .state[1] = CHACHA_CONSTANT_ND_3, - .state[2] = CHACHA_CONSTANT_2_BY, - .state[3] = CHACHA_CONSTANT_TE_K, -}; - /* * crng_init = 0 --> Uninitialized * 1 --> Initialized @@ -325,9 +267,6 @@ static struct crng_state primary_crng = { static int crng_init = 0; #define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; -#define CRNG_INIT_CNT_THRESH (2 * CHACHA_KEY_SIZE) -static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]); -static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used); static void process_random_ready_list(void); static void _get_random_bytes(void *buf, int nbytes); @@ -470,7 +409,30 @@ static void credit_entropy_bits(int nbits) * *********************************************************************/ -#define CRNG_RESEED_INTERVAL (300 * HZ) +enum { + CRNG_RESEED_INTERVAL = 300 * HZ, + CRNG_INIT_CNT_THRESH = 2 * CHACHA_KEY_SIZE +}; + +static struct { + u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long)); + unsigned long birth; + unsigned long generation; + spinlock_t lock; +} base_crng = { + .lock = __SPIN_LOCK_UNLOCKED(base_crng.lock) +}; + +struct crng { + u8 key[CHACHA_KEY_SIZE]; + unsigned long generation; + local_lock_t lock; +}; + +static DEFINE_PER_CPU(struct crng, crngs) = { + .generation = ULONG_MAX, + .lock = INIT_LOCAL_LOCK(crngs.lock), +}; static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); @@ -487,22 +449,22 @@ static size_t crng_fast_load(const u8 *cp, size_t len) u8 *p; size_t ret = 0; - if (!spin_trylock_irqsave(&primary_crng.lock, flags)) + if (!spin_trylock_irqsave(&base_crng.lock, flags)) return 0; if (crng_init != 0) { - spin_unlock_irqrestore(&primary_crng.lock, flags); + spin_unlock_irqrestore(&base_crng.lock, flags); return 0; } - p = (u8 *)&primary_crng.state[4]; + p = base_crng.key; while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { - p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; + p[crng_init_cnt % sizeof(base_crng.key)] ^= *cp; cp++; crng_init_cnt++; len--; ret++; } if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { invalidate_batched_entropy(); crng_init = 1; } - spin_unlock_irqrestore(&primary_crng.lock, flags); + spin_unlock_irqrestore(&base_crng.lock, flags); if (crng_init == 1) pr_notice("fast init done\n"); return ret; @@ -527,14 +489,14 @@ static int crng_slow_load(const u8 *cp, size_t len) unsigned long flags; static u8 lfsr = 1; u8 tmp; - unsigned int i, max = CHACHA_KEY_SIZE; + unsigned int i, max = sizeof(base_crng.key); const u8 *src_buf = cp; - u8 *dest_buf = (u8 *)&primary_crng.state[4]; + u8 *dest_buf = base_crng.key; - if (!spin_trylock_irqsave(&primary_crng.lock, flags)) + if (!spin_trylock_irqsave(&base_crng.lock, flags)) return 0; if (crng_init != 0) { - spin_unlock_irqrestore(&primary_crng.lock, flags); + spin_unlock_irqrestore(&base_crng.lock, flags); return 0; } if (len > max) @@ -545,38 +507,50 @@ static int crng_slow_load(const u8 *cp, size_t len) lfsr >>= 1; if (tmp & 1) lfsr ^= 0xE1; - tmp = dest_buf[i % CHACHA_KEY_SIZE]; - dest_buf[i % CHACHA_KEY_SIZE] ^= src_buf[i % len] ^ lfsr; + tmp = dest_buf[i % sizeof(base_crng.key)]; + dest_buf[i % sizeof(base_crng.key)] ^= src_buf[i % len] ^ lfsr; lfsr += (tmp << 3) | (tmp >> 5); } - spin_unlock_irqrestore(&primary_crng.lock, flags); + spin_unlock_irqrestore(&base_crng.lock, flags); return 1; } static void crng_reseed(void) { unsigned long flags; - int i, entropy_count; - union { - u8 block[CHACHA_BLOCK_SIZE]; - u32 key[8]; - } buf; + int entropy_count; + unsigned long next_gen; + u8 key[CHACHA_KEY_SIZE]; + /* + * First we make sure we have POOL_MIN_BITS of entropy in the pool, + * and then we drain all of it. Only then can we extract a new key. + */ do { entropy_count = READ_ONCE(input_pool.entropy_count); if (entropy_count < POOL_MIN_BITS) return; } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); - extract_entropy(buf.key, sizeof(buf.key)); + extract_entropy(key, sizeof(key)); wake_up_interruptible(&random_write_wait); kill_fasync(&fasync, SIGIO, POLL_OUT); - spin_lock_irqsave(&primary_crng.lock, flags); - for (i = 0; i < 8; i++) - primary_crng.state[i + 4] ^= buf.key[i]; - memzero_explicit(&buf, sizeof(buf)); - WRITE_ONCE(primary_crng.init_time, jiffies); - spin_unlock_irqrestore(&primary_crng.lock, flags); + /* + * We copy the new key into the base_crng, overwriting the old one, + * and update the generation counter. We avoid hitting ULONG_MAX, + * because the per-cpu crngs are initialized to ULONG_MAX, so this + * forces new CPUs that come online to always initialize. + */ + spin_lock_irqsave(&base_crng.lock, flags); + memcpy(base_crng.key, key, sizeof(base_crng.key)); + next_gen = base_crng.generation + 1; + if (next_gen == ULONG_MAX) + ++next_gen; + WRITE_ONCE(base_crng.generation, next_gen); + WRITE_ONCE(base_crng.birth, jiffies); + spin_unlock_irqrestore(&base_crng.lock, flags); + memzero_explicit(key, sizeof(key)); + if (crng_init < 2) { invalidate_batched_entropy(); crng_init = 2; @@ -597,77 +571,143 @@ static void crng_reseed(void) } } -static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]) +/* + * The general form here is based on a "fast key erasure RNG" from + * . It generates a ChaCha + * block using the provided key, and then immediately overwites that + * key with half the block. It returns the resultant ChaCha state to the + * user, along with the second half of the block containing 32 bytes of + * random data that may be used; random_data_len may not be greater than + * 32. + */ +static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], + u32 chacha_state[CHACHA_STATE_WORDS], + u8 *random_data, size_t random_data_len) { - unsigned long flags, init_time; + u8 first_block[CHACHA_BLOCK_SIZE]; - if (crng_ready()) { - init_time = READ_ONCE(primary_crng.init_time); - if (time_after(jiffies, init_time + CRNG_RESEED_INTERVAL)) - crng_reseed(); - } - spin_lock_irqsave(&primary_crng.lock, flags); - chacha20_block(&primary_crng.state[0], out); - if (primary_crng.state[12] == 0) - primary_crng.state[13]++; - spin_unlock_irqrestore(&primary_crng.lock, flags); + BUG_ON(random_data_len > 32); + + chacha_init_consts(chacha_state); + memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE); + memset(&chacha_state[12], 0, sizeof(u32) * 4); + chacha20_block(chacha_state, first_block); + + memcpy(key, first_block, CHACHA_KEY_SIZE); + memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len); + memzero_explicit(first_block, sizeof(first_block)); } /* - * Use the leftover bytes from the CRNG block output (if there is - * enough) to mutate the CRNG key to provide backtracking protection. + * This function returns a ChaCha state that you may use for generating + * random data. It also returns up to 32 bytes on its own of random data + * that may be used; random_data_len may not be greater than 32. */ -static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used) +static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], + u8 *random_data, size_t random_data_len) { unsigned long flags; - u32 *s, *d; - int i; + struct crng *crng; - used = round_up(used, sizeof(u32)); - if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) { - extract_crng(tmp); - used = 0; + BUG_ON(random_data_len > 32); + + /* + * For the fast path, we check whether we're ready, unlocked first, and + * then re-check once locked later. In the case where we're really not + * ready, we do fast key erasure with the base_crng directly, because + * this is what crng_{fast,slow}_load mutate during early init. + */ + if (unlikely(!crng_ready())) { + bool ready; + + spin_lock_irqsave(&base_crng.lock, flags); + ready = crng_ready(); + if (!ready) + crng_fast_key_erasure(base_crng.key, chacha_state, + random_data, random_data_len); + spin_unlock_irqrestore(&base_crng.lock, flags); + if (!ready) + return; } - spin_lock_irqsave(&primary_crng.lock, flags); - s = (u32 *)&tmp[used]; - d = &primary_crng.state[4]; - for (i = 0; i < 8; i++) - *d++ ^= *s++; - spin_unlock_irqrestore(&primary_crng.lock, flags); + + /* + * If the base_crng is more than 5 minutes old, we reseed, which + * in turn bumps the generation counter that we check below. + */ + if (unlikely(time_after(jiffies, READ_ONCE(base_crng.birth) + CRNG_RESEED_INTERVAL))) + crng_reseed(); + + local_lock_irqsave(&crngs.lock, flags); + crng = raw_cpu_ptr(&crngs); + + /* + * If our per-cpu crng is older than the base_crng, then it means + * somebody reseeded the base_crng. In that case, we do fast key + * erasure on the base_crng, and use its output as the new key + * for our per-cpu crng. This brings us up to date with base_crng. + */ + if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) { + spin_lock(&base_crng.lock); + crng_fast_key_erasure(base_crng.key, chacha_state, + crng->key, sizeof(crng->key)); + crng->generation = base_crng.generation; + spin_unlock(&base_crng.lock); + } + + /* + * Finally, when we've made it this far, our per-cpu crng has an up + * to date key, and we can do fast key erasure with it to produce + * some random data and a ChaCha state for the caller. All other + * branches of this function are "unlikely", so most of the time we + * should wind up here immediately. + */ + crng_fast_key_erasure(crng->key, chacha_state, random_data, random_data_len); + local_unlock_irqrestore(&crngs.lock, flags); } -static ssize_t extract_crng_user(void __user *buf, size_t nbytes) +static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) { - ssize_t ret = 0, i = CHACHA_BLOCK_SIZE; - u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); - int large_request = (nbytes > 256); + bool large_request = nbytes > 256; + ssize_t ret = 0, len; + u32 chacha_state[CHACHA_STATE_WORDS]; + u8 output[CHACHA_BLOCK_SIZE]; + + if (!nbytes) + return 0; + + len = min_t(ssize_t, 32, nbytes); + crng_make_state(chacha_state, output, len); + + if (copy_to_user(buf, output, len)) + return -EFAULT; + nbytes -= len; + buf += len; + ret += len; while (nbytes) { if (large_request && need_resched()) { - if (signal_pending(current)) { - if (ret == 0) - ret = -ERESTARTSYS; + if (signal_pending(current)) break; - } schedule(); } - extract_crng(tmp); - i = min_t(int, nbytes, CHACHA_BLOCK_SIZE); - if (copy_to_user(buf, tmp, i)) { + chacha20_block(chacha_state, output); + if (unlikely(chacha_state[12] == 0)) + ++chacha_state[13]; + + len = min_t(ssize_t, nbytes, CHACHA_BLOCK_SIZE); + if (copy_to_user(buf, output, len)) { ret = -EFAULT; break; } - nbytes -= i; - buf += i; - ret += i; + nbytes -= len; + buf += len; + ret += len; } - crng_backtrack_protect(tmp, i); - - /* Wipe data just written to memory */ - memzero_explicit(tmp, sizeof(tmp)); + memzero_explicit(chacha_state, sizeof(chacha_state)); + memzero_explicit(output, sizeof(output)); return ret; } @@ -976,23 +1016,36 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void */ static void _get_random_bytes(void *buf, int nbytes) { - u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); + u32 chacha_state[CHACHA_STATE_WORDS]; + u8 tmp[CHACHA_BLOCK_SIZE]; + ssize_t len; trace_get_random_bytes(nbytes, _RET_IP_); - while (nbytes >= CHACHA_BLOCK_SIZE) { - extract_crng(buf); - buf += CHACHA_BLOCK_SIZE; + if (!nbytes) + return; + + len = min_t(ssize_t, 32, nbytes); + crng_make_state(chacha_state, buf, len); + nbytes -= len; + buf += len; + + while (nbytes) { + if (nbytes < CHACHA_BLOCK_SIZE) { + chacha20_block(chacha_state, tmp); + memcpy(buf, tmp, nbytes); + memzero_explicit(tmp, sizeof(tmp)); + break; + } + + chacha20_block(chacha_state, buf); + if (unlikely(chacha_state[12] == 0)) + ++chacha_state[13]; nbytes -= CHACHA_BLOCK_SIZE; + buf += CHACHA_BLOCK_SIZE; } - if (nbytes > 0) { - extract_crng(tmp); - memcpy(buf, tmp, nbytes); - crng_backtrack_protect(tmp, nbytes); - } else - crng_backtrack_protect(tmp, CHACHA_BLOCK_SIZE); - memzero_explicit(tmp, sizeof(tmp)); + memzero_explicit(chacha_state, sizeof(chacha_state)); } void get_random_bytes(void *buf, int nbytes) @@ -1223,13 +1276,12 @@ int __init rand_initialize(void) mix_pool_bytes(&now, sizeof(now)); mix_pool_bytes(utsname(), sizeof(*(utsname()))); - extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); + extract_entropy(base_crng.key, sizeof(base_crng.key)); if (arch_init && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); crng_init = 2; pr_notice("crng init done (trusting CPU's manufacturer)\n"); } - primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1; if (ratelimit_disable) { urandom_warning.interval = 0; @@ -1261,7 +1313,7 @@ static ssize_t urandom_read_nowarn(struct file *file, char __user *buf, int ret; nbytes = min_t(size_t, nbytes, INT_MAX >> 6); - ret = extract_crng_user(buf, nbytes); + ret = get_random_bytes_user(buf, nbytes); trace_urandom_read(8 * nbytes, 0, input_pool.entropy_count); return ret; } @@ -1567,8 +1619,15 @@ static atomic_t batch_generation = ATOMIC_INIT(0); struct batched_entropy { union { - u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)]; - u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)]; + /* + * We make this 1.5x a ChaCha block, so that we get the + * remaining 32 bytes from fast key erasure, plus one full + * block from the detached ChaCha state. We can increase + * the size of this later if needed so long as we keep the + * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. + */ + u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))]; + u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))]; }; local_lock_t lock; unsigned int position; @@ -1577,14 +1636,13 @@ struct batched_entropy { /* * Get a random word for internal kernel use only. The quality of the random - * number is good as /dev/urandom, but there is no backtrack protection, with - * the goal of being quite fast and not depleting entropy. In order to ensure - * that the randomness provided by this function is okay, the function - * wait_for_random_bytes() should be called and return 0 at least once at any - * point prior. + * number is good as /dev/urandom. In order to ensure that the randomness + * provided by this function is okay, the function wait_for_random_bytes() + * should be called and return 0 at least once at any point prior. */ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { - .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock) + .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock), + .position = UINT_MAX }; u64 get_random_u64(void) @@ -1601,21 +1659,24 @@ u64 get_random_u64(void) batch = raw_cpu_ptr(&batched_entropy_u64); next_gen = atomic_read(&batch_generation); - if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0 || + if (batch->position >= ARRAY_SIZE(batch->entropy_u64) || next_gen != batch->generation) { - extract_crng((u8 *)batch->entropy_u64); + _get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64)); batch->position = 0; batch->generation = next_gen; } - ret = batch->entropy_u64[batch->position++]; + ret = batch->entropy_u64[batch->position]; + batch->entropy_u64[batch->position] = 0; + ++batch->position; local_unlock_irqrestore(&batched_entropy_u64.lock, flags); return ret; } EXPORT_SYMBOL(get_random_u64); static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { - .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock) + .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock), + .position = UINT_MAX }; u32 get_random_u32(void) @@ -1632,14 +1693,16 @@ u32 get_random_u32(void) batch = raw_cpu_ptr(&batched_entropy_u32); next_gen = atomic_read(&batch_generation); - if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0 || + if (batch->position >= ARRAY_SIZE(batch->entropy_u32) || next_gen != batch->generation) { - extract_crng((u8 *)batch->entropy_u32); + _get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32)); batch->position = 0; batch->generation = next_gen; } - ret = batch->entropy_u32[batch->position++]; + ret = batch->entropy_u32[batch->position]; + batch->entropy_u32[batch->position] = 0; + ++batch->position; local_unlock_irqrestore(&batched_entropy_u32.lock, flags); return ret; } -- GitLab From ba789caa17a051425e311592344d9b7841790c2d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 8 Feb 2022 19:23:17 +0100 Subject: [PATCH 0187/4335] random: use hash function for crng_slow_load() commit 66e4c2b9541503d721e936cc3898c9f25f4591ff upstream. Since we have a hash function that's really fast, and the goal of crng_slow_load() is reportedly to "touch all of the crng's state", we can just hash the old state together with the new state and call it a day. This way we dont need to reason about another LFSR or worry about various attacks there. This code is only ever used at early boot and then never again. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 626c53c21b7ea..866b855c16090 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -477,42 +477,30 @@ static size_t crng_fast_load(const u8 *cp, size_t len) * all), and (2) it doesn't have the performance constraints of * crng_fast_load(). * - * So we do something more comprehensive which is guaranteed to touch - * all of the primary_crng's state, and which uses a LFSR with a - * period of 255 as part of the mixing algorithm. Finally, we do - * *not* advance crng_init_cnt since buffer we may get may be something - * like a fixed DMI table (for example), which might very well be - * unique to the machine, but is otherwise unvarying. + * So, we simply hash the contents in with the current key. Finally, + * we do *not* advance crng_init_cnt since buffer we may get may be + * something like a fixed DMI table (for example), which might very + * well be unique to the machine, but is otherwise unvarying. */ -static int crng_slow_load(const u8 *cp, size_t len) +static void crng_slow_load(const u8 *cp, size_t len) { unsigned long flags; - static u8 lfsr = 1; - u8 tmp; - unsigned int i, max = sizeof(base_crng.key); - const u8 *src_buf = cp; - u8 *dest_buf = base_crng.key; + struct blake2s_state hash; + + blake2s_init(&hash, sizeof(base_crng.key)); if (!spin_trylock_irqsave(&base_crng.lock, flags)) - return 0; + return; if (crng_init != 0) { spin_unlock_irqrestore(&base_crng.lock, flags); - return 0; - } - if (len > max) - max = len; - - for (i = 0; i < max; i++) { - tmp = lfsr; - lfsr >>= 1; - if (tmp & 1) - lfsr ^= 0xE1; - tmp = dest_buf[i % sizeof(base_crng.key)]; - dest_buf[i % sizeof(base_crng.key)] ^= src_buf[i % len] ^ lfsr; - lfsr += (tmp << 3) | (tmp >> 5); + return; } + + blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); + blake2s_update(&hash, cp, len); + blake2s_final(&hash, base_crng.key); + spin_unlock_irqrestore(&base_crng.lock, flags); - return 1; } static void crng_reseed(void) -- GitLab From 5dd066ade8207a484b12b81c5315f90ae3785b9c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 9 Feb 2022 14:43:25 +0100 Subject: [PATCH 0188/4335] random: make more consistent use of integer types commit 04ec96b768c9dd43946b047c3da60dcc66431370 upstream. We've been using a flurry of int, unsigned int, size_t, and ssize_t. Let's unify all of this into size_t where it makes sense, as it does in most places, and leave ssize_t for return values with possible errors. In addition, keeping with the convention of other functions in this file, functions that are dealing with raw bytes now take void * consistently instead of a mix of that and u8 *, because much of the time we're actually passing some other structure that is then interpreted as bytes by the function. We also take the opportunity to fix the outdated and incorrect comment in get_random_bytes_arch(). Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Jann Horn Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 123 +++++++++++++++------------------- include/linux/hw_random.h | 2 +- include/linux/random.h | 10 +-- include/trace/events/random.h | 79 +++++++++++----------- 4 files changed, 99 insertions(+), 115 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 866b855c16090..357fefa23b962 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -69,7 +69,7 @@ * * The primary kernel interfaces are: * - * void get_random_bytes(void *buf, int nbytes); + * void get_random_bytes(void *buf, size_t nbytes); * u32 get_random_u32() * u64 get_random_u64() * unsigned int get_random_int() @@ -97,14 +97,14 @@ * The current exported interfaces for gathering environmental noise * from the devices are: * - * void add_device_randomness(const void *buf, unsigned int size); + * void add_device_randomness(const void *buf, size_t size); * void add_input_randomness(unsigned int type, unsigned int code, * unsigned int value); * void add_interrupt_randomness(int irq); * void add_disk_randomness(struct gendisk *disk); - * void add_hwgenerator_randomness(const char *buffer, size_t count, + * void add_hwgenerator_randomness(const void *buffer, size_t count, * size_t entropy); - * void add_bootloader_randomness(const void *buf, unsigned int size); + * void add_bootloader_randomness(const void *buf, size_t size); * * add_device_randomness() is for adding data to the random pool that * is likely to differ between two devices (or possibly even per boot). @@ -268,7 +268,7 @@ static int crng_init = 0; #define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; static void process_random_ready_list(void); -static void _get_random_bytes(void *buf, int nbytes); +static void _get_random_bytes(void *buf, size_t nbytes); static struct ratelimit_state unseeded_warning = RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3); @@ -290,7 +290,7 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); static struct { struct blake2s_state hash; spinlock_t lock; - int entropy_count; + unsigned int entropy_count; } input_pool = { .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, @@ -308,18 +308,12 @@ static void crng_reseed(void); * update the entropy estimate. The caller should call * credit_entropy_bits if this is appropriate. */ -static void _mix_pool_bytes(const void *in, int nbytes) +static void _mix_pool_bytes(const void *in, size_t nbytes) { blake2s_update(&input_pool.hash, in, nbytes); } -static void __mix_pool_bytes(const void *in, int nbytes) -{ - trace_mix_pool_bytes_nolock(nbytes, _RET_IP_); - _mix_pool_bytes(in, nbytes); -} - -static void mix_pool_bytes(const void *in, int nbytes) +static void mix_pool_bytes(const void *in, size_t nbytes) { unsigned long flags; @@ -383,18 +377,18 @@ static void process_random_ready_list(void) spin_unlock_irqrestore(&random_ready_list_lock, flags); } -static void credit_entropy_bits(int nbits) +static void credit_entropy_bits(size_t nbits) { - int entropy_count, orig; + unsigned int entropy_count, orig, add; - if (nbits <= 0) + if (!nbits) return; - nbits = min(nbits, POOL_BITS); + add = min_t(size_t, nbits, POOL_BITS); do { orig = READ_ONCE(input_pool.entropy_count); - entropy_count = min(POOL_BITS, orig + nbits); + entropy_count = min_t(unsigned int, POOL_BITS, orig + add); } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_); @@ -443,10 +437,10 @@ static void invalidate_batched_entropy(void); * path. So we can't afford to dilly-dally. Returns the number of * bytes processed from cp. */ -static size_t crng_fast_load(const u8 *cp, size_t len) +static size_t crng_fast_load(const void *cp, size_t len) { unsigned long flags; - u8 *p; + const u8 *src = (const u8 *)cp; size_t ret = 0; if (!spin_trylock_irqsave(&base_crng.lock, flags)) @@ -455,10 +449,9 @@ static size_t crng_fast_load(const u8 *cp, size_t len) spin_unlock_irqrestore(&base_crng.lock, flags); return 0; } - p = base_crng.key; while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { - p[crng_init_cnt % sizeof(base_crng.key)] ^= *cp; - cp++; crng_init_cnt++; len--; ret++; + base_crng.key[crng_init_cnt % sizeof(base_crng.key)] ^= *src; + src++; crng_init_cnt++; len--; ret++; } if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { invalidate_batched_entropy(); @@ -482,7 +475,7 @@ static size_t crng_fast_load(const u8 *cp, size_t len) * something like a fixed DMI table (for example), which might very * well be unique to the machine, but is otherwise unvarying. */ -static void crng_slow_load(const u8 *cp, size_t len) +static void crng_slow_load(const void *cp, size_t len) { unsigned long flags; struct blake2s_state hash; @@ -656,14 +649,15 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) { bool large_request = nbytes > 256; - ssize_t ret = 0, len; + ssize_t ret = 0; + size_t len; u32 chacha_state[CHACHA_STATE_WORDS]; u8 output[CHACHA_BLOCK_SIZE]; if (!nbytes) return 0; - len = min_t(ssize_t, 32, nbytes); + len = min_t(size_t, 32, nbytes); crng_make_state(chacha_state, output, len); if (copy_to_user(buf, output, len)) @@ -683,7 +677,7 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; - len = min_t(ssize_t, nbytes, CHACHA_BLOCK_SIZE); + len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE); if (copy_to_user(buf, output, len)) { ret = -EFAULT; break; @@ -721,7 +715,7 @@ struct timer_rand_state { * the entropy pool having similar initial state across largely * identical devices. */ -void add_device_randomness(const void *buf, unsigned int size) +void add_device_randomness(const void *buf, size_t size) { unsigned long time = random_get_entropy() ^ jiffies; unsigned long flags; @@ -749,7 +743,7 @@ static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE; * keyboard scan codes, and 256 upwards for interrupts. * */ -static void add_timer_randomness(struct timer_rand_state *state, unsigned num) +static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) { struct { long jiffies; @@ -793,7 +787,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) * Round down by 1 bit on general principles, * and limit entropy estimate to 12 bits. */ - credit_entropy_bits(min_t(int, fls(delta >> 1), 11)); + credit_entropy_bits(min_t(unsigned int, fls(delta >> 1), 11)); } void add_input_randomness(unsigned int type, unsigned int code, @@ -874,8 +868,8 @@ void add_interrupt_randomness(int irq) add_interrupt_bench(cycles); if (unlikely(crng_init == 0)) { - if ((fast_pool->count >= 64) && - crng_fast_load((u8 *)fast_pool->pool, sizeof(fast_pool->pool)) > 0) { + if (fast_pool->count >= 64 && + crng_fast_load(fast_pool->pool, sizeof(fast_pool->pool)) > 0) { fast_pool->count = 0; fast_pool->last = now; if (spin_trylock(&input_pool.lock)) { @@ -893,7 +887,7 @@ void add_interrupt_randomness(int irq) return; fast_pool->last = now; - __mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool)); + _mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool)); spin_unlock(&input_pool.lock); fast_pool->count = 0; @@ -1002,18 +996,18 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void * wait_for_random_bytes() should be called and return 0 at least once * at any point prior. */ -static void _get_random_bytes(void *buf, int nbytes) +static void _get_random_bytes(void *buf, size_t nbytes) { u32 chacha_state[CHACHA_STATE_WORDS]; u8 tmp[CHACHA_BLOCK_SIZE]; - ssize_t len; + size_t len; trace_get_random_bytes(nbytes, _RET_IP_); if (!nbytes) return; - len = min_t(ssize_t, 32, nbytes); + len = min_t(size_t, 32, nbytes); crng_make_state(chacha_state, buf, len); nbytes -= len; buf += len; @@ -1036,7 +1030,7 @@ static void _get_random_bytes(void *buf, int nbytes) memzero_explicit(chacha_state, sizeof(chacha_state)); } -void get_random_bytes(void *buf, int nbytes) +void get_random_bytes(void *buf, size_t nbytes) { static void *previous; @@ -1197,25 +1191,19 @@ EXPORT_SYMBOL(del_random_ready_callback); /* * This function will use the architecture-specific hardware random - * number generator if it is available. The arch-specific hw RNG will - * almost certainly be faster than what we can do in software, but it - * is impossible to verify that it is implemented securely (as - * opposed, to, say, the AES encryption of a sequence number using a - * key known by the NSA). So it's useful if we need the speed, but - * only if we're willing to trust the hardware manufacturer not to - * have put in a back door. - * - * Return number of bytes filled in. + * number generator if it is available. It is not recommended for + * use. Use get_random_bytes() instead. It returns the number of + * bytes filled in. */ -int __must_check get_random_bytes_arch(void *buf, int nbytes) +size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes) { - int left = nbytes; + size_t left = nbytes; u8 *p = buf; trace_get_random_bytes_arch(left, _RET_IP_); while (left) { unsigned long v; - int chunk = min_t(int, left, sizeof(unsigned long)); + size_t chunk = min_t(size_t, left, sizeof(unsigned long)); if (!arch_get_random_long(&v)) break; @@ -1248,12 +1236,12 @@ early_param("random.trust_cpu", parse_trust_cpu); */ int __init rand_initialize(void) { - int i; + size_t i; ktime_t now = ktime_get_real(); bool arch_init = true; unsigned long rv; - for (i = BLAKE2S_BLOCK_SIZE; i > 0; i -= sizeof(rv)) { + for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) { if (!arch_get_random_seed_long_early(&rv) && !arch_get_random_long_early(&rv)) { rv = random_get_entropy(); @@ -1302,7 +1290,7 @@ static ssize_t urandom_read_nowarn(struct file *file, char __user *buf, nbytes = min_t(size_t, nbytes, INT_MAX >> 6); ret = get_random_bytes_user(buf, nbytes); - trace_urandom_read(8 * nbytes, 0, input_pool.entropy_count); + trace_urandom_read(nbytes, input_pool.entropy_count); return ret; } @@ -1346,19 +1334,18 @@ static __poll_t random_poll(struct file *file, poll_table *wait) return mask; } -static int write_pool(const char __user *buffer, size_t count) +static int write_pool(const char __user *ubuf, size_t count) { - size_t bytes; - u8 buf[BLAKE2S_BLOCK_SIZE]; - const char __user *p = buffer; + size_t len; + u8 block[BLAKE2S_BLOCK_SIZE]; - while (count > 0) { - bytes = min(count, sizeof(buf)); - if (copy_from_user(buf, p, bytes)) + while (count) { + len = min(count, sizeof(block)); + if (copy_from_user(block, ubuf, len)) return -EFAULT; - count -= bytes; - p += bytes; - mix_pool_bytes(buf, bytes); + count -= len; + ubuf += len; + mix_pool_bytes(block, len); cond_resched(); } @@ -1368,7 +1355,7 @@ static int write_pool(const char __user *buffer, size_t count) static ssize_t random_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - size_t ret; + int ret; ret = write_pool(buffer, count); if (ret) @@ -1464,8 +1451,6 @@ const struct file_operations urandom_fops = { SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, flags) { - int ret; - if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) return -EINVAL; @@ -1480,6 +1465,8 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, count = INT_MAX; if (!(flags & GRND_INSECURE) && !crng_ready()) { + int ret; + if (flags & GRND_NONBLOCK) return -EAGAIN; ret = wait_for_random_bytes(); @@ -1741,7 +1728,7 @@ unsigned long randomize_page(unsigned long start, unsigned long range) * Those devices may produce endless random bits and will be throttled * when our pool is full. */ -void add_hwgenerator_randomness(const char *buffer, size_t count, +void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy) { if (unlikely(crng_init == 0)) { @@ -1772,7 +1759,7 @@ EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); * it would be regarded as device data. * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. */ -void add_bootloader_randomness(const void *buf, unsigned int size) +void add_bootloader_randomness(const void *buf, size_t size) { if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER)) add_hwgenerator_randomness(buf, size, size * 8); diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 8e6dd908da216..1a9fc38f8938c 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -61,6 +61,6 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); /** Feed random bits into the pool. */ -extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy); +extern void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy); #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/random.h b/include/linux/random.h index c45b2693e51fb..e92efb39779cc 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -20,8 +20,8 @@ struct random_ready_callback { struct module *owner; }; -extern void add_device_randomness(const void *, unsigned int); -extern void add_bootloader_randomness(const void *, unsigned int); +extern void add_device_randomness(const void *, size_t); +extern void add_bootloader_randomness(const void *, size_t); #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) static inline void add_latent_entropy(void) @@ -37,13 +37,13 @@ extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; extern void add_interrupt_randomness(int irq) __latent_entropy; -extern void get_random_bytes(void *buf, int nbytes); +extern void get_random_bytes(void *buf, size_t nbytes); extern int wait_for_random_bytes(void); extern int __init rand_initialize(void); extern bool rng_is_initialized(void); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); -extern int __must_check get_random_bytes_arch(void *buf, int nbytes); +extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; @@ -87,7 +87,7 @@ static inline unsigned long get_random_canary(void) /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). * Returns the result of the call to wait_for_random_bytes. */ -static inline int get_random_bytes_wait(void *buf, int nbytes) +static inline int get_random_bytes_wait(void *buf, size_t nbytes) { int ret = wait_for_random_bytes(); get_random_bytes(buf, nbytes); diff --git a/include/trace/events/random.h b/include/trace/events/random.h index ad149aeaf42c5..0609a2810a120 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -9,13 +9,13 @@ #include TRACE_EVENT(add_device_randomness, - TP_PROTO(int bytes, unsigned long IP), + TP_PROTO(size_t bytes, unsigned long IP), TP_ARGS(bytes, IP), TP_STRUCT__entry( - __field( int, bytes ) - __field(unsigned long, IP ) + __field(size_t, bytes ) + __field(unsigned long, IP ) ), TP_fast_assign( @@ -23,18 +23,18 @@ TRACE_EVENT(add_device_randomness, __entry->IP = IP; ), - TP_printk("bytes %d caller %pS", + TP_printk("bytes %zu caller %pS", __entry->bytes, (void *)__entry->IP) ); DECLARE_EVENT_CLASS(random__mix_pool_bytes, - TP_PROTO(int bytes, unsigned long IP), + TP_PROTO(size_t bytes, unsigned long IP), TP_ARGS(bytes, IP), TP_STRUCT__entry( - __field( int, bytes ) - __field(unsigned long, IP ) + __field(size_t, bytes ) + __field(unsigned long, IP ) ), TP_fast_assign( @@ -42,12 +42,12 @@ DECLARE_EVENT_CLASS(random__mix_pool_bytes, __entry->IP = IP; ), - TP_printk("input pool: bytes %d caller %pS", + TP_printk("input pool: bytes %zu caller %pS", __entry->bytes, (void *)__entry->IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, - TP_PROTO(int bytes, unsigned long IP), + TP_PROTO(size_t bytes, unsigned long IP), TP_ARGS(bytes, IP) ); @@ -59,13 +59,13 @@ DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, ); TRACE_EVENT(credit_entropy_bits, - TP_PROTO(int bits, int entropy_count, unsigned long IP), + TP_PROTO(size_t bits, size_t entropy_count, unsigned long IP), TP_ARGS(bits, entropy_count, IP), TP_STRUCT__entry( - __field( int, bits ) - __field( int, entropy_count ) + __field(size_t, bits ) + __field(size_t, entropy_count ) __field(unsigned long, IP ) ), @@ -75,34 +75,34 @@ TRACE_EVENT(credit_entropy_bits, __entry->IP = IP; ), - TP_printk("input pool: bits %d entropy_count %d caller %pS", + TP_printk("input pool: bits %zu entropy_count %zu caller %pS", __entry->bits, __entry->entropy_count, (void *)__entry->IP) ); TRACE_EVENT(add_input_randomness, - TP_PROTO(int input_bits), + TP_PROTO(size_t input_bits), TP_ARGS(input_bits), TP_STRUCT__entry( - __field( int, input_bits ) + __field(size_t, input_bits ) ), TP_fast_assign( __entry->input_bits = input_bits; ), - TP_printk("input_pool_bits %d", __entry->input_bits) + TP_printk("input_pool_bits %zu", __entry->input_bits) ); TRACE_EVENT(add_disk_randomness, - TP_PROTO(dev_t dev, int input_bits), + TP_PROTO(dev_t dev, size_t input_bits), TP_ARGS(dev, input_bits), TP_STRUCT__entry( - __field( dev_t, dev ) - __field( int, input_bits ) + __field(dev_t, dev ) + __field(size_t, input_bits ) ), TP_fast_assign( @@ -110,17 +110,17 @@ TRACE_EVENT(add_disk_randomness, __entry->input_bits = input_bits; ), - TP_printk("dev %d,%d input_pool_bits %d", MAJOR(__entry->dev), + TP_printk("dev %d,%d input_pool_bits %zu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->input_bits) ); DECLARE_EVENT_CLASS(random__get_random_bytes, - TP_PROTO(int nbytes, unsigned long IP), + TP_PROTO(size_t nbytes, unsigned long IP), TP_ARGS(nbytes, IP), TP_STRUCT__entry( - __field( int, nbytes ) + __field(size_t, nbytes ) __field(unsigned long, IP ) ), @@ -129,29 +129,29 @@ DECLARE_EVENT_CLASS(random__get_random_bytes, __entry->IP = IP; ), - TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP) + TP_printk("nbytes %zu caller %pS", __entry->nbytes, (void *)__entry->IP) ); DEFINE_EVENT(random__get_random_bytes, get_random_bytes, - TP_PROTO(int nbytes, unsigned long IP), + TP_PROTO(size_t nbytes, unsigned long IP), TP_ARGS(nbytes, IP) ); DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, - TP_PROTO(int nbytes, unsigned long IP), + TP_PROTO(size_t nbytes, unsigned long IP), TP_ARGS(nbytes, IP) ); DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(int nbytes, int entropy_count), + TP_PROTO(size_t nbytes, size_t entropy_count), TP_ARGS(nbytes, entropy_count), TP_STRUCT__entry( - __field( int, nbytes ) - __field( int, entropy_count ) + __field( size_t, nbytes ) + __field( size_t, entropy_count ) ), TP_fast_assign( @@ -159,37 +159,34 @@ DECLARE_EVENT_CLASS(random__extract_entropy, __entry->entropy_count = entropy_count; ), - TP_printk("input pool: nbytes %d entropy_count %d", + TP_printk("input pool: nbytes %zu entropy_count %zu", __entry->nbytes, __entry->entropy_count) ); DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(int nbytes, int entropy_count), + TP_PROTO(size_t nbytes, size_t entropy_count), TP_ARGS(nbytes, entropy_count) ); TRACE_EVENT(urandom_read, - TP_PROTO(int got_bits, int pool_left, int input_left), + TP_PROTO(size_t nbytes, size_t entropy_count), - TP_ARGS(got_bits, pool_left, input_left), + TP_ARGS(nbytes, entropy_count), TP_STRUCT__entry( - __field( int, got_bits ) - __field( int, pool_left ) - __field( int, input_left ) + __field( size_t, nbytes ) + __field( size_t, entropy_count ) ), TP_fast_assign( - __entry->got_bits = got_bits; - __entry->pool_left = pool_left; - __entry->input_left = input_left; + __entry->nbytes = nbytes; + __entry->entropy_count = entropy_count; ), - TP_printk("got_bits %d nonblocking_pool_entropy_left %d " - "input_entropy_left %d", __entry->got_bits, - __entry->pool_left, __entry->input_left) + TP_printk("reading: nbytes %zu entropy_count %zu", + __entry->nbytes, __entry->entropy_count) ); TRACE_EVENT(prandom_u32, -- GitLab From eda555356b39cab00e9c15aa3258786c568cfb3a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 7 Feb 2022 23:37:13 +0100 Subject: [PATCH 0189/4335] random: remove outdated INT_MAX >> 6 check in urandom_read() commit 434537ae54ad37e93555de21b6ac8133d6d773a9 upstream. In 79a8468747c5 ("random: check for increase of entropy_count because of signed conversion"), a number of checks were added around what values were passed to account(), because account() was doing fancy fixed point fractional arithmetic, and a user had some ability to pass large values directly into it. One of things in that commit was limiting those values to INT_MAX >> 6. The first >> 3 was for bytes to bits, and the next >> 3 was for bits to 1/8 fractional bits. However, for several years now, urandom reads no longer touch entropy accounting, and so this check serves no purpose. The current flow is: urandom_read_nowarn()-->get_random_bytes_user()-->chacha20_block() Of course, we don't want that size_t to be truncated when adding it into the ssize_t. But we arrive at urandom_read_nowarn() in the first place either via ordinary fops, which limits reads to MAX_RW_COUNT, or via getrandom() which limits reads to INT_MAX. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Jann Horn Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 357fefa23b962..88f55921757f1 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1286,9 +1286,8 @@ void rand_initialize_disk(struct gendisk *disk) static ssize_t urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - int ret; + ssize_t ret; - nbytes = min_t(size_t, nbytes, INT_MAX >> 6); ret = get_random_bytes_user(buf, nbytes); trace_urandom_read(nbytes, input_pool.entropy_count); return ret; -- GitLab From d2d1ad9430b1e23aa2f82bca561e85868c519383 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 9 Feb 2022 18:42:13 +0100 Subject: [PATCH 0190/4335] random: zero buffer after reading entropy from userspace commit 7b5164fb1279bf0251371848e40bae646b59b3a8 upstream. This buffer may contain entropic data that shouldn't stick around longer than needed, so zero out the temporary buffer at the end of write_pool(). Reviewed-by: Dominik Brodowski Reviewed-by: Jann Horn Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 88f55921757f1..6108403564079 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1336,19 +1336,24 @@ static __poll_t random_poll(struct file *file, poll_table *wait) static int write_pool(const char __user *ubuf, size_t count) { size_t len; + int ret = 0; u8 block[BLAKE2S_BLOCK_SIZE]; while (count) { len = min(count, sizeof(block)); - if (copy_from_user(block, ubuf, len)) - return -EFAULT; + if (copy_from_user(block, ubuf, len)) { + ret = -EFAULT; + goto out; + } count -= len; ubuf += len; mix_pool_bytes(block, len); cond_resched(); } - return 0; +out: + memzero_explicit(block, sizeof(block)); + return ret; } static ssize_t random_write(struct file *file, const char __user *buffer, -- GitLab From 86ab60616ec6827b98b0f2c6ee96fcca77c018e2 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 9 Feb 2022 19:57:06 +0100 Subject: [PATCH 0191/4335] random: fix locking for crng_init in crng_reseed() commit 7191c628fe07b70d3f37de736d173d1b115396ed upstream. crng_init is protected by primary_crng->lock. Therefore, we need to hold this lock when increasing crng_init to 2. As we shouldn't hold this lock for too long, only hold it for those parts which require protection. Signed-off-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 6108403564079..1da8687610ea6 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -502,6 +502,7 @@ static void crng_reseed(void) int entropy_count; unsigned long next_gen; u8 key[CHACHA_KEY_SIZE]; + bool finalize_init = false; /* * First we make sure we have POOL_MIN_BITS of entropy in the pool, @@ -529,12 +530,14 @@ static void crng_reseed(void) ++next_gen; WRITE_ONCE(base_crng.generation, next_gen); WRITE_ONCE(base_crng.birth, jiffies); - spin_unlock_irqrestore(&base_crng.lock, flags); - memzero_explicit(key, sizeof(key)); - if (crng_init < 2) { invalidate_batched_entropy(); crng_init = 2; + finalize_init = true; + } + spin_unlock_irqrestore(&base_crng.lock, flags); + memzero_explicit(key, sizeof(key)); + if (finalize_init) { process_random_ready_list(); wake_up_interruptible(&crng_init_wait); kill_fasync(&fasync, SIGIO, POLL_IN); -- GitLab From ad5d17d0faf93cde620af933045c267339c578c1 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 9 Feb 2022 22:46:48 +0100 Subject: [PATCH 0192/4335] random: tie batched entropy generation to base_crng generation commit 0791e8b655cc373718f0f58800fdc625a3447ac5 upstream. Now that we have an explicit base_crng generation counter, we don't need a separate one for batched entropy. Rather, we can just move the generation forward every time we change crng_init state or update the base_crng key. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 1da8687610ea6..14574a211951c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -430,8 +430,6 @@ static DEFINE_PER_CPU(struct crng, crngs) = { static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); -static void invalidate_batched_entropy(void); - /* * crng_fast_load() can be called by code in the interrupt service * path. So we can't afford to dilly-dally. Returns the number of @@ -454,7 +452,7 @@ static size_t crng_fast_load(const void *cp, size_t len) src++; crng_init_cnt++; len--; ret++; } if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { - invalidate_batched_entropy(); + ++base_crng.generation; crng_init = 1; } spin_unlock_irqrestore(&base_crng.lock, flags); @@ -531,7 +529,6 @@ static void crng_reseed(void) WRITE_ONCE(base_crng.generation, next_gen); WRITE_ONCE(base_crng.birth, jiffies); if (crng_init < 2) { - invalidate_batched_entropy(); crng_init = 2; finalize_init = true; } @@ -1256,8 +1253,9 @@ int __init rand_initialize(void) mix_pool_bytes(utsname(), sizeof(*(utsname()))); extract_entropy(base_crng.key, sizeof(base_crng.key)); + ++base_crng.generation; + if (arch_init && trust_cpu && crng_init < 2) { - invalidate_batched_entropy(); crng_init = 2; pr_notice("crng init done (trusting CPU's manufacturer)\n"); } @@ -1597,8 +1595,6 @@ struct ctl_table random_table[] = { }; #endif /* CONFIG_SYSCTL */ -static atomic_t batch_generation = ATOMIC_INIT(0); - struct batched_entropy { union { /* @@ -1612,8 +1608,8 @@ struct batched_entropy { u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))]; }; local_lock_t lock; + unsigned long generation; unsigned int position; - int generation; }; /* @@ -1633,14 +1629,14 @@ u64 get_random_u64(void) unsigned long flags; struct batched_entropy *batch; static void *previous; - int next_gen; + unsigned long next_gen; warn_unseeded_randomness(&previous); local_lock_irqsave(&batched_entropy_u64.lock, flags); batch = raw_cpu_ptr(&batched_entropy_u64); - next_gen = atomic_read(&batch_generation); + next_gen = READ_ONCE(base_crng.generation); if (batch->position >= ARRAY_SIZE(batch->entropy_u64) || next_gen != batch->generation) { _get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64)); @@ -1667,14 +1663,14 @@ u32 get_random_u32(void) unsigned long flags; struct batched_entropy *batch; static void *previous; - int next_gen; + unsigned long next_gen; warn_unseeded_randomness(&previous); local_lock_irqsave(&batched_entropy_u32.lock, flags); batch = raw_cpu_ptr(&batched_entropy_u32); - next_gen = atomic_read(&batch_generation); + next_gen = READ_ONCE(base_crng.generation); if (batch->position >= ARRAY_SIZE(batch->entropy_u32) || next_gen != batch->generation) { _get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32)); @@ -1690,15 +1686,6 @@ u32 get_random_u32(void) } EXPORT_SYMBOL(get_random_u32); -/* It's important to invalidate all potential batched entropy that might - * be stored before the crng is initialized, which we can do lazily by - * bumping the generation counter. - */ -static void invalidate_batched_entropy(void) -{ - atomic_inc(&batch_generation); -} - /** * randomize_page - Generate a random, page aligned address * @start: The smallest acceptable address the caller will take. -- GitLab From 4509941f75ac8d4fc4a798d03ade86d15eb10caf Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 10 Feb 2022 16:35:24 +0100 Subject: [PATCH 0193/4335] random: remove ifdef'd out interrupt bench commit 95e6060c20a7f5db60163274c5222a725ac118f9 upstream. With tools like kbench9000 giving more finegrained responses, and this basically never having been used ever since it was initially added, let's just get rid of this. There *is* still work to be done on the interrupt handler, but this really isn't the way it's being developed. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/sysctl/kernel.rst | 9 ----- drivers/char/random.c | 40 --------------------- 2 files changed, 49 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index e568f32bc3062..9df263c85aabe 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1031,15 +1031,6 @@ This is a directory, with the following entries: are woken up. This file is writable for compatibility purposes, but writing to it has no effect on any RNG behavior. -If ``drivers/char/random.c`` is built with ``ADD_INTERRUPT_BENCH`` -defined, these additional entries are present: - -* ``add_interrupt_avg_cycles``: the average number of cycles between - interrupts used to feed the pool; - -* ``add_interrupt_avg_deviation``: the standard deviation seen on the - number of cycles between interrupts used to feed the pool. - randomize_va_space ================== diff --git a/drivers/char/random.c b/drivers/char/random.c index 14574a211951c..d1d01be190ccc 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -240,8 +240,6 @@ #define CREATE_TRACE_POINTS #include -/* #define ADD_INTERRUPT_BENCH */ - enum { POOL_BITS = BLAKE2S_HASH_SIZE * 8, POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ @@ -808,27 +806,6 @@ EXPORT_SYMBOL_GPL(add_input_randomness); static DEFINE_PER_CPU(struct fast_pool, irq_randomness); -#ifdef ADD_INTERRUPT_BENCH -static unsigned long avg_cycles, avg_deviation; - -#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */ -#define FIXED_1_2 (1 << (AVG_SHIFT - 1)) - -static void add_interrupt_bench(cycles_t start) -{ - long delta = random_get_entropy() - start; - - /* Use a weighted moving average */ - delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT); - avg_cycles += delta; - /* And average deviation */ - delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT); - avg_deviation += delta; -} -#else -#define add_interrupt_bench(x) -#endif - static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) { u32 *ptr = (u32 *)regs; @@ -865,7 +842,6 @@ void add_interrupt_randomness(int irq) (sizeof(ip) > 4) ? ip >> 32 : get_reg(fast_pool, regs); fast_mix(fast_pool); - add_interrupt_bench(cycles); if (unlikely(crng_init == 0)) { if (fast_pool->count >= 64 && @@ -1575,22 +1551,6 @@ struct ctl_table random_table[] = { .mode = 0444, .proc_handler = proc_do_uuid, }, -#ifdef ADD_INTERRUPT_BENCH - { - .procname = "add_interrupt_avg_cycles", - .data = &avg_cycles, - .maxlen = sizeof(avg_cycles), - .mode = 0444, - .proc_handler = proc_doulongvec_minmax, - }, - { - .procname = "add_interrupt_avg_deviation", - .data = &avg_deviation, - .maxlen = sizeof(avg_deviation), - .mode = 0444, - .proc_handler = proc_doulongvec_minmax, - }, -#endif { } }; #endif /* CONFIG_SYSCTL */ -- GitLab From ce951e9672df144b15540de6cf81d4b1f2ce7d39 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 10 Feb 2022 16:40:44 +0100 Subject: [PATCH 0194/4335] random: remove unused tracepoints commit 14c174633f349cb41ea90c2c0aaddac157012f74 upstream. These explicit tracepoints aren't really used and show sign of aging. It's work to keep these up to date, and before I attempted to keep them up to date, they weren't up to date, which indicates that they're not really used. These days there are better ways of introspecting anyway. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 30 +---- include/trace/events/random.h | 212 ---------------------------------- lib/random32.c | 3 +- 3 files changed, 4 insertions(+), 241 deletions(-) delete mode 100644 include/trace/events/random.h diff --git a/drivers/char/random.c b/drivers/char/random.c index d1d01be190ccc..b5889b52a251e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -237,9 +237,6 @@ #include #include -#define CREATE_TRACE_POINTS -#include - enum { POOL_BITS = BLAKE2S_HASH_SIZE * 8, POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ @@ -315,7 +312,6 @@ static void mix_pool_bytes(const void *in, size_t nbytes) { unsigned long flags; - trace_mix_pool_bytes(nbytes, _RET_IP_); spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(in, nbytes); spin_unlock_irqrestore(&input_pool.lock, flags); @@ -389,8 +385,6 @@ static void credit_entropy_bits(size_t nbits) entropy_count = min_t(unsigned int, POOL_BITS, orig + add); } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); - trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_); - if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) crng_reseed(); } @@ -721,7 +715,6 @@ void add_device_randomness(const void *buf, size_t size) if (!crng_ready() && size) crng_slow_load(buf, size); - trace_add_device_randomness(size, _RET_IP_); spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(buf, size); _mix_pool_bytes(&time, sizeof(time)); @@ -800,7 +793,6 @@ void add_input_randomness(unsigned int type, unsigned int code, last_value = value; add_timer_randomness(&input_timer_state, (type << 4) ^ code ^ (code >> 4) ^ value); - trace_add_input_randomness(input_pool.entropy_count); } EXPORT_SYMBOL_GPL(add_input_randomness); @@ -880,7 +872,6 @@ void add_disk_randomness(struct gendisk *disk) return; /* first major is 1, so we get >= 0x200 here */ add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); - trace_add_disk_randomness(disk_devt(disk), input_pool.entropy_count); } EXPORT_SYMBOL_GPL(add_disk_randomness); #endif @@ -905,8 +896,6 @@ static void extract_entropy(void *buf, size_t nbytes) } block; size_t i; - trace_extract_entropy(nbytes, input_pool.entropy_count); - for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) { if (!arch_get_random_seed_long(&block.rdseed[i]) && !arch_get_random_long(&block.rdseed[i])) @@ -978,8 +967,6 @@ static void _get_random_bytes(void *buf, size_t nbytes) u8 tmp[CHACHA_BLOCK_SIZE]; size_t len; - trace_get_random_bytes(nbytes, _RET_IP_); - if (!nbytes) return; @@ -1176,7 +1163,6 @@ size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes) size_t left = nbytes; u8 *p = buf; - trace_get_random_bytes_arch(left, _RET_IP_); while (left) { unsigned long v; size_t chunk = min_t(size_t, left, sizeof(unsigned long)); @@ -1260,16 +1246,6 @@ void rand_initialize_disk(struct gendisk *disk) } #endif -static ssize_t urandom_read_nowarn(struct file *file, char __user *buf, - size_t nbytes, loff_t *ppos) -{ - ssize_t ret; - - ret = get_random_bytes_user(buf, nbytes); - trace_urandom_read(nbytes, input_pool.entropy_count); - return ret; -} - static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { @@ -1282,7 +1258,7 @@ static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, current->comm, nbytes); } - return urandom_read_nowarn(file, buf, nbytes, ppos); + return get_random_bytes_user(buf, nbytes); } static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, @@ -1293,7 +1269,7 @@ static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, ret = wait_for_random_bytes(); if (ret != 0) return ret; - return urandom_read_nowarn(file, buf, nbytes, ppos); + return get_random_bytes_user(buf, nbytes); } static __poll_t random_poll(struct file *file, poll_table *wait) @@ -1454,7 +1430,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, if (unlikely(ret)) return ret; } - return urandom_read_nowarn(NULL, buf, count, NULL); + return get_random_bytes_user(buf, count); } /******************************************************************** diff --git a/include/trace/events/random.h b/include/trace/events/random.h deleted file mode 100644 index 0609a2810a120..0000000000000 --- a/include/trace/events/random.h +++ /dev/null @@ -1,212 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM random - -#if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_RANDOM_H - -#include -#include - -TRACE_EVENT(add_device_randomness, - TP_PROTO(size_t bytes, unsigned long IP), - - TP_ARGS(bytes, IP), - - TP_STRUCT__entry( - __field(size_t, bytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->bytes = bytes; - __entry->IP = IP; - ), - - TP_printk("bytes %zu caller %pS", - __entry->bytes, (void *)__entry->IP) -); - -DECLARE_EVENT_CLASS(random__mix_pool_bytes, - TP_PROTO(size_t bytes, unsigned long IP), - - TP_ARGS(bytes, IP), - - TP_STRUCT__entry( - __field(size_t, bytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->bytes = bytes; - __entry->IP = IP; - ), - - TP_printk("input pool: bytes %zu caller %pS", - __entry->bytes, (void *)__entry->IP) -); - -DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, - TP_PROTO(size_t bytes, unsigned long IP), - - TP_ARGS(bytes, IP) -); - -DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, - TP_PROTO(int bytes, unsigned long IP), - - TP_ARGS(bytes, IP) -); - -TRACE_EVENT(credit_entropy_bits, - TP_PROTO(size_t bits, size_t entropy_count, unsigned long IP), - - TP_ARGS(bits, entropy_count, IP), - - TP_STRUCT__entry( - __field(size_t, bits ) - __field(size_t, entropy_count ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->bits = bits; - __entry->entropy_count = entropy_count; - __entry->IP = IP; - ), - - TP_printk("input pool: bits %zu entropy_count %zu caller %pS", - __entry->bits, __entry->entropy_count, (void *)__entry->IP) -); - -TRACE_EVENT(add_input_randomness, - TP_PROTO(size_t input_bits), - - TP_ARGS(input_bits), - - TP_STRUCT__entry( - __field(size_t, input_bits ) - ), - - TP_fast_assign( - __entry->input_bits = input_bits; - ), - - TP_printk("input_pool_bits %zu", __entry->input_bits) -); - -TRACE_EVENT(add_disk_randomness, - TP_PROTO(dev_t dev, size_t input_bits), - - TP_ARGS(dev, input_bits), - - TP_STRUCT__entry( - __field(dev_t, dev ) - __field(size_t, input_bits ) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->input_bits = input_bits; - ), - - TP_printk("dev %d,%d input_pool_bits %zu", MAJOR(__entry->dev), - MINOR(__entry->dev), __entry->input_bits) -); - -DECLARE_EVENT_CLASS(random__get_random_bytes, - TP_PROTO(size_t nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP), - - TP_STRUCT__entry( - __field(size_t, nbytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->nbytes = nbytes; - __entry->IP = IP; - ), - - TP_printk("nbytes %zu caller %pS", __entry->nbytes, (void *)__entry->IP) -); - -DEFINE_EVENT(random__get_random_bytes, get_random_bytes, - TP_PROTO(size_t nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP) -); - -DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, - TP_PROTO(size_t nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP) -); - -DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(size_t nbytes, size_t entropy_count), - - TP_ARGS(nbytes, entropy_count), - - TP_STRUCT__entry( - __field( size_t, nbytes ) - __field( size_t, entropy_count ) - ), - - TP_fast_assign( - __entry->nbytes = nbytes; - __entry->entropy_count = entropy_count; - ), - - TP_printk("input pool: nbytes %zu entropy_count %zu", - __entry->nbytes, __entry->entropy_count) -); - - -DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(size_t nbytes, size_t entropy_count), - - TP_ARGS(nbytes, entropy_count) -); - -TRACE_EVENT(urandom_read, - TP_PROTO(size_t nbytes, size_t entropy_count), - - TP_ARGS(nbytes, entropy_count), - - TP_STRUCT__entry( - __field( size_t, nbytes ) - __field( size_t, entropy_count ) - ), - - TP_fast_assign( - __entry->nbytes = nbytes; - __entry->entropy_count = entropy_count; - ), - - TP_printk("reading: nbytes %zu entropy_count %zu", - __entry->nbytes, __entry->entropy_count) -); - -TRACE_EVENT(prandom_u32, - - TP_PROTO(unsigned int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( unsigned int, ret) - ), - - TP_fast_assign( - __entry->ret = ret; - ), - - TP_printk("ret=%u" , __entry->ret) -); - -#endif /* _TRACE_RANDOM_H */ - -/* This part must be outside protection */ -#include diff --git a/lib/random32.c b/lib/random32.c index 4d0e05e471d72..3c19820796d04 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -39,8 +39,8 @@ #include #include #include +#include #include -#include /** * prandom_u32_state - seeded pseudo-random number generator. @@ -386,7 +386,6 @@ u32 prandom_u32(void) struct siprand_state *state = get_cpu_ptr(&net_rand_state); u32 res = siprand_u32(state); - trace_prandom_u32(res); put_cpu_ptr(&net_rand_state); return res; } -- GitLab From f641f9495d6fdf55a7da565a76130896f492e61a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 10 Feb 2022 16:43:57 +0100 Subject: [PATCH 0195/4335] random: add proper SPDX header commit a07fdae346c35c6ba286af1c88e0effcfa330bf9 upstream. Convert the current license into the SPDX notation of "(GPL-2.0 OR BSD-3-Clause)". This infers GPL-2.0 from the text "ALTERNATIVELY, this product may be distributed under the terms of the GNU General Public License, in which case the provisions of the GPL are required INSTEAD OF the above restrictions" and it infers BSD-3-Clause from the verbatim BSD 3 clause license in the file. Cc: Thomas Gleixner Cc: Theodore Ts'o Cc: Dominik Brodowski Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 37 +------------------------------------ 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index b5889b52a251e..ef9c96d8bdfd5 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1,44 +1,9 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* - * random.c -- A strong random number generator - * * Copyright (C) 2017-2022 Jason A. Donenfeld . All Rights Reserved. - * * Copyright Matt Mackall , 2003, 2004, 2005 - * * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All * rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, and the entire permission notice in its entirety, - * including the disclaimer of warranties. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * ALTERNATIVELY, this product may be distributed under the terms of - * the GNU General Public License, in which case the provisions of the GPL are - * required INSTEAD OF the above restrictions. (This clause is - * necessary due to a potential bad interaction between the GPL and - * the restrictions contained in a BSD-style copyright.) - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF - * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT - * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. */ /* -- GitLab From c807eca035c3f8bd550172ee55238856adc40296 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 10 Feb 2022 17:01:27 +0100 Subject: [PATCH 0196/4335] random: deobfuscate irq u32/u64 contributions commit b2f408fe403800c91a49f6589d95b6759ce1b30b upstream. In the irq handler, we fill out 16 bytes differently on 32-bit and 64-bit platforms, and for 32-bit vs 64-bit cycle counters, which doesn't always correspond with the bitness of the platform. Whether or not you like this strangeness, it is a matter of fact. But it might not be a fact you well realized until now, because the code that loaded the irq info into 4 32-bit words was quite confusing. Instead, this commit makes everything explicit by having separate (compile-time) branches for 32-bit and 64-bit types. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 49 ++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index ef9c96d8bdfd5..291b7e8d23938 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -283,7 +283,10 @@ static void mix_pool_bytes(const void *in, size_t nbytes) } struct fast_pool { - u32 pool[4]; + union { + u32 pool32[4]; + u64 pool64[2]; + }; unsigned long last; u16 reg_idx; u8 count; @@ -294,10 +297,10 @@ struct fast_pool { * collector. It's hardcoded for an 128 bit pool and assumes that any * locks that might be needed are taken by the caller. */ -static void fast_mix(struct fast_pool *f) +static void fast_mix(u32 pool[4]) { - u32 a = f->pool[0], b = f->pool[1]; - u32 c = f->pool[2], d = f->pool[3]; + u32 a = pool[0], b = pool[1]; + u32 c = pool[2], d = pool[3]; a += b; c += d; b = rol32(b, 6); d = rol32(d, 27); @@ -315,9 +318,8 @@ static void fast_mix(struct fast_pool *f) b = rol32(b, 16); d = rol32(d, 14); d ^= a; b ^= c; - f->pool[0] = a; f->pool[1] = b; - f->pool[2] = c; f->pool[3] = d; - f->count++; + pool[0] = a; pool[1] = b; + pool[2] = c; pool[3] = d; } static void process_random_ready_list(void) @@ -784,29 +786,34 @@ void add_interrupt_randomness(int irq) struct pt_regs *regs = get_irq_regs(); unsigned long now = jiffies; cycles_t cycles = random_get_entropy(); - u32 c_high, j_high; - u64 ip; if (cycles == 0) cycles = get_reg(fast_pool, regs); - c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0; - j_high = (sizeof(now) > 4) ? now >> 32 : 0; - fast_pool->pool[0] ^= cycles ^ j_high ^ irq; - fast_pool->pool[1] ^= now ^ c_high; - ip = regs ? instruction_pointer(regs) : _RET_IP_; - fast_pool->pool[2] ^= ip; - fast_pool->pool[3] ^= - (sizeof(ip) > 4) ? ip >> 32 : get_reg(fast_pool, regs); - fast_mix(fast_pool); + if (sizeof(cycles) == 8) + fast_pool->pool64[0] ^= cycles ^ rol64(now, 32) ^ irq; + else { + fast_pool->pool32[0] ^= cycles ^ irq; + fast_pool->pool32[1] ^= now; + } + + if (sizeof(unsigned long) == 8) + fast_pool->pool64[1] ^= regs ? instruction_pointer(regs) : _RET_IP_; + else { + fast_pool->pool32[2] ^= regs ? instruction_pointer(regs) : _RET_IP_; + fast_pool->pool32[3] ^= get_reg(fast_pool, regs); + } + + fast_mix(fast_pool->pool32); + ++fast_pool->count; if (unlikely(crng_init == 0)) { if (fast_pool->count >= 64 && - crng_fast_load(fast_pool->pool, sizeof(fast_pool->pool)) > 0) { + crng_fast_load(fast_pool->pool32, sizeof(fast_pool->pool32)) > 0) { fast_pool->count = 0; fast_pool->last = now; if (spin_trylock(&input_pool.lock)) { - _mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool)); + _mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32)); spin_unlock(&input_pool.lock); } } @@ -820,7 +827,7 @@ void add_interrupt_randomness(int irq) return; fast_pool->last = now; - _mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool)); + _mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32)); spin_unlock(&input_pool.lock); fast_pool->count = 0; -- GitLab From 95f85a01584e454b540332c860d23ffd622d1b4c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:19:49 +0100 Subject: [PATCH 0197/4335] random: introduce drain_entropy() helper to declutter crng_reseed() commit 246c03dd899164d0186b6d685d6387f228c28d93 upstream. In preparation for separating responsibilities, break out the entropy count management part of crng_reseed() into its own function. No functional changes. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 291b7e8d23938..d7ae07da8878d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -260,6 +260,7 @@ static struct { }; static void extract_entropy(void *buf, size_t nbytes); +static bool drain_entropy(void *buf, size_t nbytes); static void crng_reseed(void); @@ -456,23 +457,13 @@ static void crng_slow_load(const void *cp, size_t len) static void crng_reseed(void) { unsigned long flags; - int entropy_count; unsigned long next_gen; u8 key[CHACHA_KEY_SIZE]; bool finalize_init = false; - /* - * First we make sure we have POOL_MIN_BITS of entropy in the pool, - * and then we drain all of it. Only then can we extract a new key. - */ - do { - entropy_count = READ_ONCE(input_pool.entropy_count); - if (entropy_count < POOL_MIN_BITS) - return; - } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); - extract_entropy(key, sizeof(key)); - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); + /* Only reseed if we can, to prevent brute forcing a small amount of new bits. */ + if (!drain_entropy(key, sizeof(key))) + return; /* * We copy the new key into the base_crng, overwriting the old one, @@ -900,6 +891,25 @@ static void extract_entropy(void *buf, size_t nbytes) memzero_explicit(&block, sizeof(block)); } +/* + * First we make sure we have POOL_MIN_BITS of entropy in the pool, and then we + * set the entropy count to zero (but don't actually touch any data). Only then + * can we extract a new key with extract_entropy(). + */ +static bool drain_entropy(void *buf, size_t nbytes) +{ + unsigned int entropy_count; + do { + entropy_count = READ_ONCE(input_pool.entropy_count); + if (entropy_count < POOL_MIN_BITS) + return false; + } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); + extract_entropy(buf, nbytes); + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + return true; +} + #define warn_unseeded_randomness(previous) \ _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous)) -- GitLab From 30c52e99db0d87ffe7e64b367d38446ecde0e3f6 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:28:33 +0100 Subject: [PATCH 0198/4335] random: remove useless header comment commit 6071a6c0fba2d747742cadcbb3ba26ed756ed73b upstream. This really adds nothing at all useful. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- include/linux/random.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/random.h b/include/linux/random.h index e92efb39779cc..37e1e8c43d7ee 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -1,9 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * include/linux/random.h - * - * Include file for the random number generator. - */ + #ifndef _LINUX_RANDOM_H #define _LINUX_RANDOM_H -- GitLab From d3a2510216b40ba004907e8a51dd29a81657744f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 13:41:41 +0100 Subject: [PATCH 0199/4335] random: remove whitespace and reorder includes commit 87e7d5abad0cbc9312dea7f889a57d294c1a5fcc upstream. This is purely cosmetic. Future work involves figuring out which of these headers we need and which we don't. Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index d7ae07da8878d..007eee1fcb8f0 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -193,11 +193,10 @@ #include #include #include +#include #include #include - #include -#include #include #include #include -- GitLab From 81e62ff1f6503d99bf283c6421cf4395c69dc684 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:53:34 +0100 Subject: [PATCH 0200/4335] random: group initialization wait functions commit 5f1bb112006b104b3e2a1e1b39bbb9b2617581e6 upstream. This pulls all of the readiness waiting-focused functions into the first labeled section. No functional changes. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 333 ++++++++++++++++++++++-------------------- 1 file changed, 172 insertions(+), 161 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 007eee1fcb8f0..c68a3e33ef2ce 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -201,44 +201,197 @@ #include #include -enum { - POOL_BITS = BLAKE2S_HASH_SIZE * 8, - POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ -}; - -/* - * Static global variables - */ -static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); -static struct fasync_struct *fasync; - -static DEFINE_SPINLOCK(random_ready_list_lock); -static LIST_HEAD(random_ready_list); +/********************************************************************* + * + * Initialization and readiness waiting. + * + * Much of the RNG infrastructure is devoted to various dependencies + * being able to wait until the RNG has collected enough entropy and + * is ready for safe consumption. + * + *********************************************************************/ /* * crng_init = 0 --> Uninitialized * 1 --> Initialized * 2 --> Initialized from input_pool * - * crng_init is protected by primary_crng->lock, and only increases + * crng_init is protected by base_crng->lock, and only increases * its value (from 0->1->2). */ static int crng_init = 0; #define crng_ready() (likely(crng_init > 1)) -static int crng_init_cnt = 0; -static void process_random_ready_list(void); -static void _get_random_bytes(void *buf, size_t nbytes); +/* Various types of waiters for crng_init->2 transition. */ +static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); +static struct fasync_struct *fasync; +static DEFINE_SPINLOCK(random_ready_list_lock); +static LIST_HEAD(random_ready_list); +/* Control how we warn userspace. */ static struct ratelimit_state unseeded_warning = RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3); static struct ratelimit_state urandom_warning = RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3); - static int ratelimit_disable __read_mostly; - module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); +/* + * Returns whether or not the input pool has been seeded and thus guaranteed + * to supply cryptographically secure random numbers. This applies to: the + * /dev/urandom device, the get_random_bytes function, and the get_random_{u32, + * ,u64,int,long} family of functions. + * + * Returns: true if the input pool has been seeded. + * false if the input pool has not been seeded. + */ +bool rng_is_initialized(void) +{ + return crng_ready(); +} +EXPORT_SYMBOL(rng_is_initialized); + +/* Used by wait_for_random_bytes(), and considered an entropy collector, below. */ +static void try_to_generate_entropy(void); + +/* + * Wait for the input pool to be seeded and thus guaranteed to supply + * cryptographically secure random numbers. This applies to: the /dev/urandom + * device, the get_random_bytes function, and the get_random_{u32,u64,int,long} + * family of functions. Using any of these functions without first calling + * this function forfeits the guarantee of security. + * + * Returns: 0 if the input pool has been seeded. + * -ERESTARTSYS if the function was interrupted by a signal. + */ +int wait_for_random_bytes(void) +{ + if (likely(crng_ready())) + return 0; + + do { + int ret; + ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); + if (ret) + return ret > 0 ? 0 : ret; + + try_to_generate_entropy(); + } while (!crng_ready()); + + return 0; +} +EXPORT_SYMBOL(wait_for_random_bytes); + +/* + * Add a callback function that will be invoked when the input + * pool is initialised. + * + * returns: 0 if callback is successfully added + * -EALREADY if pool is already initialised (callback not called) + * -ENOENT if module for callback is not alive + */ +int add_random_ready_callback(struct random_ready_callback *rdy) +{ + struct module *owner; + unsigned long flags; + int err = -EALREADY; + + if (crng_ready()) + return err; + + owner = rdy->owner; + if (!try_module_get(owner)) + return -ENOENT; + + spin_lock_irqsave(&random_ready_list_lock, flags); + if (crng_ready()) + goto out; + + owner = NULL; + + list_add(&rdy->list, &random_ready_list); + err = 0; + +out: + spin_unlock_irqrestore(&random_ready_list_lock, flags); + + module_put(owner); + + return err; +} +EXPORT_SYMBOL(add_random_ready_callback); + +/* + * Delete a previously registered readiness callback function. + */ +void del_random_ready_callback(struct random_ready_callback *rdy) +{ + unsigned long flags; + struct module *owner = NULL; + + spin_lock_irqsave(&random_ready_list_lock, flags); + if (!list_empty(&rdy->list)) { + list_del_init(&rdy->list); + owner = rdy->owner; + } + spin_unlock_irqrestore(&random_ready_list_lock, flags); + + module_put(owner); +} +EXPORT_SYMBOL(del_random_ready_callback); + +static void process_random_ready_list(void) +{ + unsigned long flags; + struct random_ready_callback *rdy, *tmp; + + spin_lock_irqsave(&random_ready_list_lock, flags); + list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) { + struct module *owner = rdy->owner; + + list_del_init(&rdy->list); + rdy->func(rdy); + module_put(owner); + } + spin_unlock_irqrestore(&random_ready_list_lock, flags); +} + +#define warn_unseeded_randomness(previous) \ + _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous)) + +static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous) +{ +#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM + const bool print_once = false; +#else + static bool print_once __read_mostly; +#endif + + if (print_once || crng_ready() || + (previous && (caller == READ_ONCE(*previous)))) + return; + WRITE_ONCE(*previous, caller); +#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM + print_once = true; +#endif + if (__ratelimit(&unseeded_warning)) + printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", + func_name, caller, crng_init); +} + + +enum { + POOL_BITS = BLAKE2S_HASH_SIZE * 8, + POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ +}; + +/* + * Static global variables + */ +static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); + +static int crng_init_cnt = 0; + /********************************************************************** * * OS independent entropy store. Here are the functions which handle @@ -322,22 +475,6 @@ static void fast_mix(u32 pool[4]) pool[2] = c; pool[3] = d; } -static void process_random_ready_list(void) -{ - unsigned long flags; - struct random_ready_callback *rdy, *tmp; - - spin_lock_irqsave(&random_ready_list_lock, flags); - list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) { - struct module *owner = rdy->owner; - - list_del_init(&rdy->list); - rdy->func(rdy); - module_put(owner); - } - spin_unlock_irqrestore(&random_ready_list_lock, flags); -} - static void credit_entropy_bits(size_t nbits) { unsigned int entropy_count, orig, add; @@ -387,8 +524,6 @@ static DEFINE_PER_CPU(struct crng, crngs) = { .lock = INIT_LOCAL_LOCK(crngs.lock), }; -static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); - /* * crng_fast_load() can be called by code in the interrupt service * path. So we can't afford to dilly-dally. Returns the number of @@ -909,29 +1044,6 @@ static bool drain_entropy(void *buf, size_t nbytes) return true; } -#define warn_unseeded_randomness(previous) \ - _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous)) - -static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous) -{ -#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM - const bool print_once = false; -#else - static bool print_once __read_mostly; -#endif - - if (print_once || crng_ready() || - (previous && (caller == READ_ONCE(*previous)))) - return; - WRITE_ONCE(*previous, caller); -#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM - print_once = true; -#endif - if (__ratelimit(&unseeded_warning)) - printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", - func_name, caller, crng_init); -} - /* * This function is the exported kernel interface. It returns some * number of good random numbers, suitable for key generation, seeding @@ -1032,107 +1144,6 @@ static void try_to_generate_entropy(void) mix_pool_bytes(&stack.now, sizeof(stack.now)); } -/* - * Wait for the urandom pool to be seeded and thus guaranteed to supply - * cryptographically secure random numbers. This applies to: the /dev/urandom - * device, the get_random_bytes function, and the get_random_{u32,u64,int,long} - * family of functions. Using any of these functions without first calling - * this function forfeits the guarantee of security. - * - * Returns: 0 if the urandom pool has been seeded. - * -ERESTARTSYS if the function was interrupted by a signal. - */ -int wait_for_random_bytes(void) -{ - if (likely(crng_ready())) - return 0; - - do { - int ret; - ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); - if (ret) - return ret > 0 ? 0 : ret; - - try_to_generate_entropy(); - } while (!crng_ready()); - - return 0; -} -EXPORT_SYMBOL(wait_for_random_bytes); - -/* - * Returns whether or not the urandom pool has been seeded and thus guaranteed - * to supply cryptographically secure random numbers. This applies to: the - * /dev/urandom device, the get_random_bytes function, and the get_random_{u32, - * ,u64,int,long} family of functions. - * - * Returns: true if the urandom pool has been seeded. - * false if the urandom pool has not been seeded. - */ -bool rng_is_initialized(void) -{ - return crng_ready(); -} -EXPORT_SYMBOL(rng_is_initialized); - -/* - * Add a callback function that will be invoked when the nonblocking - * pool is initialised. - * - * returns: 0 if callback is successfully added - * -EALREADY if pool is already initialised (callback not called) - * -ENOENT if module for callback is not alive - */ -int add_random_ready_callback(struct random_ready_callback *rdy) -{ - struct module *owner; - unsigned long flags; - int err = -EALREADY; - - if (crng_ready()) - return err; - - owner = rdy->owner; - if (!try_module_get(owner)) - return -ENOENT; - - spin_lock_irqsave(&random_ready_list_lock, flags); - if (crng_ready()) - goto out; - - owner = NULL; - - list_add(&rdy->list, &random_ready_list); - err = 0; - -out: - spin_unlock_irqrestore(&random_ready_list_lock, flags); - - module_put(owner); - - return err; -} -EXPORT_SYMBOL(add_random_ready_callback); - -/* - * Delete a previously registered readiness callback function. - */ -void del_random_ready_callback(struct random_ready_callback *rdy) -{ - unsigned long flags; - struct module *owner = NULL; - - spin_lock_irqsave(&random_ready_list_lock, flags); - if (!list_empty(&rdy->list)) { - list_del_init(&rdy->list); - owner = rdy->owner; - } - spin_unlock_irqrestore(&random_ready_list_lock, flags); - - module_put(owner); -} -EXPORT_SYMBOL(del_random_ready_callback); - /* * This function will use the architecture-specific hardware random * number generator if it is available. It is not recommended for -- GitLab From 019e057db9b1505cf7112129d657635bfe22f188 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:53:34 +0100 Subject: [PATCH 0201/4335] random: group crng functions commit 3655adc7089da4f8ca74cec8fcef73ea5101430e upstream. This pulls all of the crng-focused functions into the second labeled section. No functional changes. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 862 ++++++++++++++++++++++-------------------- 1 file changed, 445 insertions(+), 417 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index c68a3e33ef2ce..202252596ccb3 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -380,122 +380,27 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void } -enum { - POOL_BITS = BLAKE2S_HASH_SIZE * 8, - POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ -}; - -/* - * Static global variables - */ -static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); - -static int crng_init_cnt = 0; - -/********************************************************************** +/********************************************************************* * - * OS independent entropy store. Here are the functions which handle - * storing entropy in an entropy pool. + * Fast key erasure RNG, the "crng". * - **********************************************************************/ - -static struct { - struct blake2s_state hash; - spinlock_t lock; - unsigned int entropy_count; -} input_pool = { - .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), - BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, - BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 }, - .hash.outlen = BLAKE2S_HASH_SIZE, - .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), -}; - -static void extract_entropy(void *buf, size_t nbytes); -static bool drain_entropy(void *buf, size_t nbytes); - -static void crng_reseed(void); - -/* - * This function adds bytes into the entropy "pool". It does not - * update the entropy estimate. The caller should call - * credit_entropy_bits if this is appropriate. - */ -static void _mix_pool_bytes(const void *in, size_t nbytes) -{ - blake2s_update(&input_pool.hash, in, nbytes); -} - -static void mix_pool_bytes(const void *in, size_t nbytes) -{ - unsigned long flags; - - spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(in, nbytes); - spin_unlock_irqrestore(&input_pool.lock, flags); -} - -struct fast_pool { - union { - u32 pool32[4]; - u64 pool64[2]; - }; - unsigned long last; - u16 reg_idx; - u8 count; -}; - -/* - * This is a fast mixing routine used by the interrupt randomness - * collector. It's hardcoded for an 128 bit pool and assumes that any - * locks that might be needed are taken by the caller. - */ -static void fast_mix(u32 pool[4]) -{ - u32 a = pool[0], b = pool[1]; - u32 c = pool[2], d = pool[3]; - - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; - - pool[0] = a; pool[1] = b; - pool[2] = c; pool[3] = d; -} - -static void credit_entropy_bits(size_t nbits) -{ - unsigned int entropy_count, orig, add; - - if (!nbits) - return; - - add = min_t(size_t, nbits, POOL_BITS); - - do { - orig = READ_ONCE(input_pool.entropy_count); - entropy_count = min_t(unsigned int, POOL_BITS, orig + add); - } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); - - if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) - crng_reseed(); -} - -/********************************************************************* + * These functions expand entropy from the entropy extractor into + * long streams for external consumption using the "fast key erasure" + * RNG described at . * - * CRNG using CHACHA20 + * There are a few exported interfaces for use by other drivers: + * + * void get_random_bytes(void *buf, size_t nbytes) + * u32 get_random_u32() + * u64 get_random_u64() + * unsigned int get_random_int() + * unsigned long get_random_long() + * + * These interfaces will return the requested number of random bytes + * into the given buffer or as a return value. This is equivalent to + * a read from /dev/urandom. The integer family of functions may be + * higher performance for one-off random integers, because they do a + * bit of buffering. * *********************************************************************/ @@ -524,70 +429,14 @@ static DEFINE_PER_CPU(struct crng, crngs) = { .lock = INIT_LOCAL_LOCK(crngs.lock), }; -/* - * crng_fast_load() can be called by code in the interrupt service - * path. So we can't afford to dilly-dally. Returns the number of - * bytes processed from cp. - */ -static size_t crng_fast_load(const void *cp, size_t len) -{ - unsigned long flags; - const u8 *src = (const u8 *)cp; - size_t ret = 0; - - if (!spin_trylock_irqsave(&base_crng.lock, flags)) - return 0; - if (crng_init != 0) { - spin_unlock_irqrestore(&base_crng.lock, flags); - return 0; - } - while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { - base_crng.key[crng_init_cnt % sizeof(base_crng.key)] ^= *src; - src++; crng_init_cnt++; len--; ret++; - } - if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { - ++base_crng.generation; - crng_init = 1; - } - spin_unlock_irqrestore(&base_crng.lock, flags); - if (crng_init == 1) - pr_notice("fast init done\n"); - return ret; -} +/* Used by crng_reseed() to extract a new seed from the input pool. */ +static bool drain_entropy(void *buf, size_t nbytes); /* - * crng_slow_load() is called by add_device_randomness, which has two - * attributes. (1) We can't trust the buffer passed to it is - * guaranteed to be unpredictable (so it might not have any entropy at - * all), and (2) it doesn't have the performance constraints of - * crng_fast_load(). - * - * So, we simply hash the contents in with the current key. Finally, - * we do *not* advance crng_init_cnt since buffer we may get may be - * something like a fixed DMI table (for example), which might very - * well be unique to the machine, but is otherwise unvarying. + * This extracts a new crng key from the input pool, but only if there is a + * sufficient amount of entropy available, in order to mitigate bruteforcing + * of newly added bits. */ -static void crng_slow_load(const void *cp, size_t len) -{ - unsigned long flags; - struct blake2s_state hash; - - blake2s_init(&hash, sizeof(base_crng.key)); - - if (!spin_trylock_irqsave(&base_crng.lock, flags)) - return; - if (crng_init != 0) { - spin_unlock_irqrestore(&base_crng.lock, flags); - return; - } - - blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); - blake2s_update(&hash, cp, len); - blake2s_final(&hash, base_crng.key); - - spin_unlock_irqrestore(&base_crng.lock, flags); -} - static void crng_reseed(void) { unsigned long flags; @@ -637,13 +486,11 @@ static void crng_reseed(void) } /* - * The general form here is based on a "fast key erasure RNG" from - * . It generates a ChaCha - * block using the provided key, and then immediately overwites that - * key with half the block. It returns the resultant ChaCha state to the - * user, along with the second half of the block containing 32 bytes of - * random data that may be used; random_data_len may not be greater than - * 32. + * This generates a ChaCha block using the provided key, and then + * immediately overwites that key with half the block. It returns + * the resultant ChaCha state to the user, along with the second + * half of the block containing 32 bytes of random data that may + * be used; random_data_len may not be greater than 32. */ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], u32 chacha_state[CHACHA_STATE_WORDS], @@ -730,51 +577,433 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], local_unlock_irqrestore(&crngs.lock, flags); } -static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) +/* + * This function is for crng_init == 0 only. + * + * crng_fast_load() can be called by code in the interrupt service + * path. So we can't afford to dilly-dally. Returns the number of + * bytes processed from cp. + */ +static size_t crng_fast_load(const void *cp, size_t len) +{ + static int crng_init_cnt = 0; + unsigned long flags; + const u8 *src = (const u8 *)cp; + size_t ret = 0; + + if (!spin_trylock_irqsave(&base_crng.lock, flags)) + return 0; + if (crng_init != 0) { + spin_unlock_irqrestore(&base_crng.lock, flags); + return 0; + } + while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { + base_crng.key[crng_init_cnt % sizeof(base_crng.key)] ^= *src; + src++; crng_init_cnt++; len--; ret++; + } + if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { + ++base_crng.generation; + crng_init = 1; + } + spin_unlock_irqrestore(&base_crng.lock, flags); + if (crng_init == 1) + pr_notice("fast init done\n"); + return ret; +} + +/* + * This function is for crng_init == 0 only. + * + * crng_slow_load() is called by add_device_randomness, which has two + * attributes. (1) We can't trust the buffer passed to it is + * guaranteed to be unpredictable (so it might not have any entropy at + * all), and (2) it doesn't have the performance constraints of + * crng_fast_load(). + * + * So, we simply hash the contents in with the current key. Finally, + * we do *not* advance crng_init_cnt since buffer we may get may be + * something like a fixed DMI table (for example), which might very + * well be unique to the machine, but is otherwise unvarying. + */ +static void crng_slow_load(const void *cp, size_t len) +{ + unsigned long flags; + struct blake2s_state hash; + + blake2s_init(&hash, sizeof(base_crng.key)); + + if (!spin_trylock_irqsave(&base_crng.lock, flags)) + return; + if (crng_init != 0) { + spin_unlock_irqrestore(&base_crng.lock, flags); + return; + } + + blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); + blake2s_update(&hash, cp, len); + blake2s_final(&hash, base_crng.key); + + spin_unlock_irqrestore(&base_crng.lock, flags); +} + +static void _get_random_bytes(void *buf, size_t nbytes) { - bool large_request = nbytes > 256; - ssize_t ret = 0; - size_t len; u32 chacha_state[CHACHA_STATE_WORDS]; - u8 output[CHACHA_BLOCK_SIZE]; + u8 tmp[CHACHA_BLOCK_SIZE]; + size_t len; + + if (!nbytes) + return; + + len = min_t(size_t, 32, nbytes); + crng_make_state(chacha_state, buf, len); + nbytes -= len; + buf += len; + + while (nbytes) { + if (nbytes < CHACHA_BLOCK_SIZE) { + chacha20_block(chacha_state, tmp); + memcpy(buf, tmp, nbytes); + memzero_explicit(tmp, sizeof(tmp)); + break; + } + + chacha20_block(chacha_state, buf); + if (unlikely(chacha_state[12] == 0)) + ++chacha_state[13]; + nbytes -= CHACHA_BLOCK_SIZE; + buf += CHACHA_BLOCK_SIZE; + } + + memzero_explicit(chacha_state, sizeof(chacha_state)); +} + +/* + * This function is the exported kernel interface. It returns some + * number of good random numbers, suitable for key generation, seeding + * TCP sequence numbers, etc. It does not rely on the hardware random + * number generator. For random bytes direct from the hardware RNG + * (when available), use get_random_bytes_arch(). In order to ensure + * that the randomness provided by this function is okay, the function + * wait_for_random_bytes() should be called and return 0 at least once + * at any point prior. + */ +void get_random_bytes(void *buf, size_t nbytes) +{ + static void *previous; + + warn_unseeded_randomness(&previous); + _get_random_bytes(buf, nbytes); +} +EXPORT_SYMBOL(get_random_bytes); + +static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) +{ + bool large_request = nbytes > 256; + ssize_t ret = 0; + size_t len; + u32 chacha_state[CHACHA_STATE_WORDS]; + u8 output[CHACHA_BLOCK_SIZE]; + + if (!nbytes) + return 0; + + len = min_t(size_t, 32, nbytes); + crng_make_state(chacha_state, output, len); + + if (copy_to_user(buf, output, len)) + return -EFAULT; + nbytes -= len; + buf += len; + ret += len; + + while (nbytes) { + if (large_request && need_resched()) { + if (signal_pending(current)) + break; + schedule(); + } + + chacha20_block(chacha_state, output); + if (unlikely(chacha_state[12] == 0)) + ++chacha_state[13]; + + len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE); + if (copy_to_user(buf, output, len)) { + ret = -EFAULT; + break; + } + + nbytes -= len; + buf += len; + ret += len; + } + + memzero_explicit(chacha_state, sizeof(chacha_state)); + memzero_explicit(output, sizeof(output)); + return ret; +} + +/* + * Batched entropy returns random integers. The quality of the random + * number is good as /dev/urandom. In order to ensure that the randomness + * provided by this function is okay, the function wait_for_random_bytes() + * should be called and return 0 at least once at any point prior. + */ +struct batched_entropy { + union { + /* + * We make this 1.5x a ChaCha block, so that we get the + * remaining 32 bytes from fast key erasure, plus one full + * block from the detached ChaCha state. We can increase + * the size of this later if needed so long as we keep the + * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. + */ + u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))]; + u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))]; + }; + local_lock_t lock; + unsigned long generation; + unsigned int position; +}; + + +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { + .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock), + .position = UINT_MAX +}; + +u64 get_random_u64(void) +{ + u64 ret; + unsigned long flags; + struct batched_entropy *batch; + static void *previous; + unsigned long next_gen; + + warn_unseeded_randomness(&previous); + + local_lock_irqsave(&batched_entropy_u64.lock, flags); + batch = raw_cpu_ptr(&batched_entropy_u64); + + next_gen = READ_ONCE(base_crng.generation); + if (batch->position >= ARRAY_SIZE(batch->entropy_u64) || + next_gen != batch->generation) { + _get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64)); + batch->position = 0; + batch->generation = next_gen; + } + + ret = batch->entropy_u64[batch->position]; + batch->entropy_u64[batch->position] = 0; + ++batch->position; + local_unlock_irqrestore(&batched_entropy_u64.lock, flags); + return ret; +} +EXPORT_SYMBOL(get_random_u64); + +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { + .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock), + .position = UINT_MAX +}; + +u32 get_random_u32(void) +{ + u32 ret; + unsigned long flags; + struct batched_entropy *batch; + static void *previous; + unsigned long next_gen; + + warn_unseeded_randomness(&previous); + + local_lock_irqsave(&batched_entropy_u32.lock, flags); + batch = raw_cpu_ptr(&batched_entropy_u32); + + next_gen = READ_ONCE(base_crng.generation); + if (batch->position >= ARRAY_SIZE(batch->entropy_u32) || + next_gen != batch->generation) { + _get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32)); + batch->position = 0; + batch->generation = next_gen; + } + + ret = batch->entropy_u32[batch->position]; + batch->entropy_u32[batch->position] = 0; + ++batch->position; + local_unlock_irqrestore(&batched_entropy_u32.lock, flags); + return ret; +} +EXPORT_SYMBOL(get_random_u32); + +/** + * randomize_page - Generate a random, page aligned address + * @start: The smallest acceptable address the caller will take. + * @range: The size of the area, starting at @start, within which the + * random address must fall. + * + * If @start + @range would overflow, @range is capped. + * + * NOTE: Historical use of randomize_range, which this replaces, presumed that + * @start was already page aligned. We now align it regardless. + * + * Return: A page aligned address within [start, start + range). On error, + * @start is returned. + */ +unsigned long randomize_page(unsigned long start, unsigned long range) +{ + if (!PAGE_ALIGNED(start)) { + range -= PAGE_ALIGN(start) - start; + start = PAGE_ALIGN(start); + } + + if (start > ULONG_MAX - range) + range = ULONG_MAX - start; + + range >>= PAGE_SHIFT; + + if (range == 0) + return start; + + return start + (get_random_long() % range << PAGE_SHIFT); +} + +/* + * This function will use the architecture-specific hardware random + * number generator if it is available. It is not recommended for + * use. Use get_random_bytes() instead. It returns the number of + * bytes filled in. + */ +size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes) +{ + size_t left = nbytes; + u8 *p = buf; + + while (left) { + unsigned long v; + size_t chunk = min_t(size_t, left, sizeof(unsigned long)); + + if (!arch_get_random_long(&v)) + break; + + memcpy(p, &v, chunk); + p += chunk; + left -= chunk; + } + + return nbytes - left; +} +EXPORT_SYMBOL(get_random_bytes_arch); + +enum { + POOL_BITS = BLAKE2S_HASH_SIZE * 8, + POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ +}; + +/* + * Static global variables + */ +static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); + +/********************************************************************** + * + * OS independent entropy store. Here are the functions which handle + * storing entropy in an entropy pool. + * + **********************************************************************/ + +static struct { + struct blake2s_state hash; + spinlock_t lock; + unsigned int entropy_count; +} input_pool = { + .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), + BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, + BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 }, + .hash.outlen = BLAKE2S_HASH_SIZE, + .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), +}; + +static void extract_entropy(void *buf, size_t nbytes); +static bool drain_entropy(void *buf, size_t nbytes); + +static void crng_reseed(void); + +/* + * This function adds bytes into the entropy "pool". It does not + * update the entropy estimate. The caller should call + * credit_entropy_bits if this is appropriate. + */ +static void _mix_pool_bytes(const void *in, size_t nbytes) +{ + blake2s_update(&input_pool.hash, in, nbytes); +} + +static void mix_pool_bytes(const void *in, size_t nbytes) +{ + unsigned long flags; + + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(in, nbytes); + spin_unlock_irqrestore(&input_pool.lock, flags); +} + +struct fast_pool { + union { + u32 pool32[4]; + u64 pool64[2]; + }; + unsigned long last; + u16 reg_idx; + u8 count; +}; + +/* + * This is a fast mixing routine used by the interrupt randomness + * collector. It's hardcoded for an 128 bit pool and assumes that any + * locks that might be needed are taken by the caller. + */ +static void fast_mix(u32 pool[4]) +{ + u32 a = pool[0], b = pool[1]; + u32 c = pool[2], d = pool[3]; + + a += b; c += d; + b = rol32(b, 6); d = rol32(d, 27); + d ^= a; b ^= c; - if (!nbytes) - return 0; + a += b; c += d; + b = rol32(b, 16); d = rol32(d, 14); + d ^= a; b ^= c; - len = min_t(size_t, 32, nbytes); - crng_make_state(chacha_state, output, len); + a += b; c += d; + b = rol32(b, 6); d = rol32(d, 27); + d ^= a; b ^= c; - if (copy_to_user(buf, output, len)) - return -EFAULT; - nbytes -= len; - buf += len; - ret += len; + a += b; c += d; + b = rol32(b, 16); d = rol32(d, 14); + d ^= a; b ^= c; - while (nbytes) { - if (large_request && need_resched()) { - if (signal_pending(current)) - break; - schedule(); - } + pool[0] = a; pool[1] = b; + pool[2] = c; pool[3] = d; +} - chacha20_block(chacha_state, output); - if (unlikely(chacha_state[12] == 0)) - ++chacha_state[13]; +static void credit_entropy_bits(size_t nbits) +{ + unsigned int entropy_count, orig, add; - len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE); - if (copy_to_user(buf, output, len)) { - ret = -EFAULT; - break; - } + if (!nbits) + return; - nbytes -= len; - buf += len; - ret += len; - } + add = min_t(size_t, nbits, POOL_BITS); - memzero_explicit(chacha_state, sizeof(chacha_state)); - memzero_explicit(output, sizeof(output)); - return ret; + do { + orig = READ_ONCE(input_pool.entropy_count); + entropy_count = min_t(unsigned int, POOL_BITS, orig + add); + } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); + + if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) + crng_reseed(); } /********************************************************************* @@ -1044,57 +1273,6 @@ static bool drain_entropy(void *buf, size_t nbytes) return true; } -/* - * This function is the exported kernel interface. It returns some - * number of good random numbers, suitable for key generation, seeding - * TCP sequence numbers, etc. It does not rely on the hardware random - * number generator. For random bytes direct from the hardware RNG - * (when available), use get_random_bytes_arch(). In order to ensure - * that the randomness provided by this function is okay, the function - * wait_for_random_bytes() should be called and return 0 at least once - * at any point prior. - */ -static void _get_random_bytes(void *buf, size_t nbytes) -{ - u32 chacha_state[CHACHA_STATE_WORDS]; - u8 tmp[CHACHA_BLOCK_SIZE]; - size_t len; - - if (!nbytes) - return; - - len = min_t(size_t, 32, nbytes); - crng_make_state(chacha_state, buf, len); - nbytes -= len; - buf += len; - - while (nbytes) { - if (nbytes < CHACHA_BLOCK_SIZE) { - chacha20_block(chacha_state, tmp); - memcpy(buf, tmp, nbytes); - memzero_explicit(tmp, sizeof(tmp)); - break; - } - - chacha20_block(chacha_state, buf); - if (unlikely(chacha_state[12] == 0)) - ++chacha_state[13]; - nbytes -= CHACHA_BLOCK_SIZE; - buf += CHACHA_BLOCK_SIZE; - } - - memzero_explicit(chacha_state, sizeof(chacha_state)); -} - -void get_random_bytes(void *buf, size_t nbytes) -{ - static void *previous; - - warn_unseeded_randomness(&previous); - _get_random_bytes(buf, nbytes); -} -EXPORT_SYMBOL(get_random_bytes); - /* * Each time the timer fires, we expect that we got an unpredictable * jump in the cycle counter. Even if the timer is running on another @@ -1144,33 +1322,6 @@ static void try_to_generate_entropy(void) mix_pool_bytes(&stack.now, sizeof(stack.now)); } -/* - * This function will use the architecture-specific hardware random - * number generator if it is available. It is not recommended for - * use. Use get_random_bytes() instead. It returns the number of - * bytes filled in. - */ -size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes) -{ - size_t left = nbytes; - u8 *p = buf; - - while (left) { - unsigned long v; - size_t chunk = min_t(size_t, left, sizeof(unsigned long)); - - if (!arch_get_random_long(&v)) - break; - - memcpy(p, &v, chunk); - p += chunk; - left -= chunk; - } - - return nbytes - left; -} -EXPORT_SYMBOL(get_random_bytes_arch); - static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); static int __init parse_trust_cpu(char *arg) { @@ -1523,129 +1674,6 @@ struct ctl_table random_table[] = { }; #endif /* CONFIG_SYSCTL */ -struct batched_entropy { - union { - /* - * We make this 1.5x a ChaCha block, so that we get the - * remaining 32 bytes from fast key erasure, plus one full - * block from the detached ChaCha state. We can increase - * the size of this later if needed so long as we keep the - * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. - */ - u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))]; - u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))]; - }; - local_lock_t lock; - unsigned long generation; - unsigned int position; -}; - -/* - * Get a random word for internal kernel use only. The quality of the random - * number is good as /dev/urandom. In order to ensure that the randomness - * provided by this function is okay, the function wait_for_random_bytes() - * should be called and return 0 at least once at any point prior. - */ -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { - .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock), - .position = UINT_MAX -}; - -u64 get_random_u64(void) -{ - u64 ret; - unsigned long flags; - struct batched_entropy *batch; - static void *previous; - unsigned long next_gen; - - warn_unseeded_randomness(&previous); - - local_lock_irqsave(&batched_entropy_u64.lock, flags); - batch = raw_cpu_ptr(&batched_entropy_u64); - - next_gen = READ_ONCE(base_crng.generation); - if (batch->position >= ARRAY_SIZE(batch->entropy_u64) || - next_gen != batch->generation) { - _get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64)); - batch->position = 0; - batch->generation = next_gen; - } - - ret = batch->entropy_u64[batch->position]; - batch->entropy_u64[batch->position] = 0; - ++batch->position; - local_unlock_irqrestore(&batched_entropy_u64.lock, flags); - return ret; -} -EXPORT_SYMBOL(get_random_u64); - -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { - .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock), - .position = UINT_MAX -}; - -u32 get_random_u32(void) -{ - u32 ret; - unsigned long flags; - struct batched_entropy *batch; - static void *previous; - unsigned long next_gen; - - warn_unseeded_randomness(&previous); - - local_lock_irqsave(&batched_entropy_u32.lock, flags); - batch = raw_cpu_ptr(&batched_entropy_u32); - - next_gen = READ_ONCE(base_crng.generation); - if (batch->position >= ARRAY_SIZE(batch->entropy_u32) || - next_gen != batch->generation) { - _get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32)); - batch->position = 0; - batch->generation = next_gen; - } - - ret = batch->entropy_u32[batch->position]; - batch->entropy_u32[batch->position] = 0; - ++batch->position; - local_unlock_irqrestore(&batched_entropy_u32.lock, flags); - return ret; -} -EXPORT_SYMBOL(get_random_u32); - -/** - * randomize_page - Generate a random, page aligned address - * @start: The smallest acceptable address the caller will take. - * @range: The size of the area, starting at @start, within which the - * random address must fall. - * - * If @start + @range would overflow, @range is capped. - * - * NOTE: Historical use of randomize_range, which this replaces, presumed that - * @start was already page aligned. We now align it regardless. - * - * Return: A page aligned address within [start, start + range). On error, - * @start is returned. - */ -unsigned long randomize_page(unsigned long start, unsigned long range) -{ - if (!PAGE_ALIGNED(start)) { - range -= PAGE_ALIGN(start) - start; - start = PAGE_ALIGN(start); - } - - if (start > ULONG_MAX - range) - range = ULONG_MAX - start; - - range >>= PAGE_SHIFT; - - if (range == 0) - return start; - - return start + (get_random_long() % range << PAGE_SHIFT); -} - /* Interface for in-kernel drivers of true hardware RNGs. * Those devices may produce endless random bits and will be throttled * when our pool is full. -- GitLab From 43e87e6f3317ec9e0f4886a2dd4cb442d5a1c1d7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:53:34 +0100 Subject: [PATCH 0202/4335] random: group entropy extraction functions commit a5ed7cb1a7732ef11959332d507889fbc39ebbb4 upstream. This pulls all of the entropy extraction-focused functions into the third labeled section. No functional changes. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 216 +++++++++++++++++++++--------------------- 1 file changed, 109 insertions(+), 107 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 202252596ccb3..170f16ac7f751 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -895,23 +895,36 @@ size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes) } EXPORT_SYMBOL(get_random_bytes_arch); + +/********************************************************************** + * + * Entropy accumulation and extraction routines. + * + * Callers may add entropy via: + * + * static void mix_pool_bytes(const void *in, size_t nbytes) + * + * After which, if added entropy should be credited: + * + * static void credit_entropy_bits(size_t nbits) + * + * Finally, extract entropy via these two, with the latter one + * setting the entropy count to zero and extracting only if there + * is POOL_MIN_BITS entropy credited prior: + * + * static void extract_entropy(void *buf, size_t nbytes) + * static bool drain_entropy(void *buf, size_t nbytes) + * + **********************************************************************/ + enum { POOL_BITS = BLAKE2S_HASH_SIZE * 8, POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ }; -/* - * Static global variables - */ +/* For notifying userspace should write into /dev/random. */ static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); -/********************************************************************** - * - * OS independent entropy store. Here are the functions which handle - * storing entropy in an entropy pool. - * - **********************************************************************/ - static struct { struct blake2s_state hash; spinlock_t lock; @@ -924,28 +937,106 @@ static struct { .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), }; -static void extract_entropy(void *buf, size_t nbytes); -static bool drain_entropy(void *buf, size_t nbytes); - -static void crng_reseed(void); +static void _mix_pool_bytes(const void *in, size_t nbytes) +{ + blake2s_update(&input_pool.hash, in, nbytes); +} /* * This function adds bytes into the entropy "pool". It does not * update the entropy estimate. The caller should call * credit_entropy_bits if this is appropriate. */ -static void _mix_pool_bytes(const void *in, size_t nbytes) +static void mix_pool_bytes(const void *in, size_t nbytes) { - blake2s_update(&input_pool.hash, in, nbytes); + unsigned long flags; + + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(in, nbytes); + spin_unlock_irqrestore(&input_pool.lock, flags); } -static void mix_pool_bytes(const void *in, size_t nbytes) +static void credit_entropy_bits(size_t nbits) +{ + unsigned int entropy_count, orig, add; + + if (!nbits) + return; + + add = min_t(size_t, nbits, POOL_BITS); + + do { + orig = READ_ONCE(input_pool.entropy_count); + entropy_count = min_t(unsigned int, POOL_BITS, orig + add); + } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); + + if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) + crng_reseed(); +} + +/* + * This is an HKDF-like construction for using the hashed collected entropy + * as a PRF key, that's then expanded block-by-block. + */ +static void extract_entropy(void *buf, size_t nbytes) { unsigned long flags; + u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; + struct { + unsigned long rdseed[32 / sizeof(long)]; + size_t counter; + } block; + size_t i; + + for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) { + if (!arch_get_random_seed_long(&block.rdseed[i]) && + !arch_get_random_long(&block.rdseed[i])) + block.rdseed[i] = random_get_entropy(); + } spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(in, nbytes); + + /* seed = HASHPRF(last_key, entropy_input) */ + blake2s_final(&input_pool.hash, seed); + + /* next_key = HASHPRF(seed, RDSEED || 0) */ + block.counter = 0; + blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed)); + blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key)); + spin_unlock_irqrestore(&input_pool.lock, flags); + memzero_explicit(next_key, sizeof(next_key)); + + while (nbytes) { + i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE); + /* output = HASHPRF(seed, RDSEED || ++counter) */ + ++block.counter; + blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); + nbytes -= i; + buf += i; + } + + memzero_explicit(seed, sizeof(seed)); + memzero_explicit(&block, sizeof(block)); +} + +/* + * First we make sure we have POOL_MIN_BITS of entropy in the pool, and then we + * set the entropy count to zero (but don't actually touch any data). Only then + * can we extract a new key with extract_entropy(). + */ +static bool drain_entropy(void *buf, size_t nbytes) +{ + unsigned int entropy_count; + do { + entropy_count = READ_ONCE(input_pool.entropy_count); + if (entropy_count < POOL_MIN_BITS) + return false; + } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); + extract_entropy(buf, nbytes); + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + return true; } struct fast_pool { @@ -988,24 +1079,6 @@ static void fast_mix(u32 pool[4]) pool[2] = c; pool[3] = d; } -static void credit_entropy_bits(size_t nbits) -{ - unsigned int entropy_count, orig, add; - - if (!nbits) - return; - - add = min_t(size_t, nbits, POOL_BITS); - - do { - orig = READ_ONCE(input_pool.entropy_count); - entropy_count = min_t(unsigned int, POOL_BITS, orig + add); - } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); - - if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) - crng_reseed(); -} - /********************************************************************* * * Entropy input management @@ -1202,77 +1275,6 @@ void add_disk_randomness(struct gendisk *disk) EXPORT_SYMBOL_GPL(add_disk_randomness); #endif -/********************************************************************* - * - * Entropy extraction routines - * - *********************************************************************/ - -/* - * This is an HKDF-like construction for using the hashed collected entropy - * as a PRF key, that's then expanded block-by-block. - */ -static void extract_entropy(void *buf, size_t nbytes) -{ - unsigned long flags; - u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; - struct { - unsigned long rdseed[32 / sizeof(long)]; - size_t counter; - } block; - size_t i; - - for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) { - if (!arch_get_random_seed_long(&block.rdseed[i]) && - !arch_get_random_long(&block.rdseed[i])) - block.rdseed[i] = random_get_entropy(); - } - - spin_lock_irqsave(&input_pool.lock, flags); - - /* seed = HASHPRF(last_key, entropy_input) */ - blake2s_final(&input_pool.hash, seed); - - /* next_key = HASHPRF(seed, RDSEED || 0) */ - block.counter = 0; - blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed)); - blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key)); - - spin_unlock_irqrestore(&input_pool.lock, flags); - memzero_explicit(next_key, sizeof(next_key)); - - while (nbytes) { - i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE); - /* output = HASHPRF(seed, RDSEED || ++counter) */ - ++block.counter; - blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); - nbytes -= i; - buf += i; - } - - memzero_explicit(seed, sizeof(seed)); - memzero_explicit(&block, sizeof(block)); -} - -/* - * First we make sure we have POOL_MIN_BITS of entropy in the pool, and then we - * set the entropy count to zero (but don't actually touch any data). Only then - * can we extract a new key with extract_entropy(). - */ -static bool drain_entropy(void *buf, size_t nbytes) -{ - unsigned int entropy_count; - do { - entropy_count = READ_ONCE(input_pool.entropy_count); - if (entropy_count < POOL_MIN_BITS) - return false; - } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); - extract_entropy(buf, nbytes); - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - return true; -} - /* * Each time the timer fires, we expect that we got an unpredictable * jump in the cycle counter. Even if the timer is running on another -- GitLab From 74b704245f3122e64543fae6b015b82d01e206a4 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:53:34 +0100 Subject: [PATCH 0203/4335] random: group entropy collection functions commit 92c653cf14400946f376a29b828d6af7e01f38dd upstream. This pulls all of the entropy collection-focused functions into the fourth labeled section. No functional changes. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 370 +++++++++++++++++++++++------------------- 1 file changed, 206 insertions(+), 164 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 170f16ac7f751..e9857a4b76ec1 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1039,60 +1039,112 @@ static bool drain_entropy(void *buf, size_t nbytes) return true; } -struct fast_pool { - union { - u32 pool32[4]; - u64 pool64[2]; - }; - unsigned long last; - u16 reg_idx; - u8 count; -}; + +/********************************************************************** + * + * Entropy collection routines. + * + * The following exported functions are used for pushing entropy into + * the above entropy accumulation routines: + * + * void add_device_randomness(const void *buf, size_t size); + * void add_input_randomness(unsigned int type, unsigned int code, + * unsigned int value); + * void add_disk_randomness(struct gendisk *disk); + * void add_hwgenerator_randomness(const void *buffer, size_t count, + * size_t entropy); + * void add_bootloader_randomness(const void *buf, size_t size); + * void add_interrupt_randomness(int irq); + * + * add_device_randomness() adds data to the input pool that + * is likely to differ between two devices (or possibly even per boot). + * This would be things like MAC addresses or serial numbers, or the + * read-out of the RTC. This does *not* credit any actual entropy to + * the pool, but it initializes the pool to different values for devices + * that might otherwise be identical and have very little entropy + * available to them (particularly common in the embedded world). + * + * add_input_randomness() uses the input layer interrupt timing, as well + * as the event type information from the hardware. + * + * add_disk_randomness() uses what amounts to the seek time of block + * layer request events, on a per-disk_devt basis, as input to the + * entropy pool. Note that high-speed solid state drives with very low + * seek times do not make for good sources of entropy, as their seek + * times are usually fairly consistent. + * + * The above two routines try to estimate how many bits of entropy + * to credit. They do this by keeping track of the first and second + * order deltas of the event timings. + * + * add_hwgenerator_randomness() is for true hardware RNGs, and will credit + * entropy as specified by the caller. If the entropy pool is full it will + * block until more entropy is needed. + * + * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or + * add_device_randomness(), depending on whether or not the configuration + * option CONFIG_RANDOM_TRUST_BOOTLOADER is set. + * + * add_interrupt_randomness() uses the interrupt timing as random + * inputs to the entropy pool. Using the cycle counters and the irq source + * as inputs, it feeds the input pool roughly once a second or after 64 + * interrupts, crediting 1 bit of entropy for whichever comes first. + * + **********************************************************************/ + +static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); +static int __init parse_trust_cpu(char *arg) +{ + return kstrtobool(arg, &trust_cpu); +} +early_param("random.trust_cpu", parse_trust_cpu); /* - * This is a fast mixing routine used by the interrupt randomness - * collector. It's hardcoded for an 128 bit pool and assumes that any - * locks that might be needed are taken by the caller. + * The first collection of entropy occurs at system boot while interrupts + * are still turned off. Here we push in RDSEED, a timestamp, and utsname(). + * Depending on the above configuration knob, RDSEED may be considered + * sufficient for initialization. Note that much earlier setup may already + * have pushed entropy into the input pool by the time we get here. */ -static void fast_mix(u32 pool[4]) +int __init rand_initialize(void) { - u32 a = pool[0], b = pool[1]; - u32 c = pool[2], d = pool[3]; - - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; + size_t i; + ktime_t now = ktime_get_real(); + bool arch_init = true; + unsigned long rv; - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; + for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) { + if (!arch_get_random_seed_long_early(&rv) && + !arch_get_random_long_early(&rv)) { + rv = random_get_entropy(); + arch_init = false; + } + mix_pool_bytes(&rv, sizeof(rv)); + } + mix_pool_bytes(&now, sizeof(now)); + mix_pool_bytes(utsname(), sizeof(*(utsname()))); - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; + extract_entropy(base_crng.key, sizeof(base_crng.key)); + ++base_crng.generation; - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; + if (arch_init && trust_cpu && crng_init < 2) { + crng_init = 2; + pr_notice("crng init done (trusting CPU's manufacturer)\n"); + } - pool[0] = a; pool[1] = b; - pool[2] = c; pool[3] = d; + if (ratelimit_disable) { + urandom_warning.interval = 0; + unseeded_warning.interval = 0; + } + return 0; } -/********************************************************************* - * - * Entropy input management - * - *********************************************************************/ - /* There is one of these per entropy source */ struct timer_rand_state { cycles_t last_time; long last_delta, last_delta2; }; -#define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, }; - /* * Add device- or boot-specific data to the input pool to help * initialize it. @@ -1116,8 +1168,6 @@ void add_device_randomness(const void *buf, size_t size) } EXPORT_SYMBOL(add_device_randomness); -static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE; - /* * This function adds entropy to the entropy "pool" by using timing * delays. It uses the timer_rand_state structure to make an estimate @@ -1179,8 +1229,9 @@ void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) { static unsigned char last_value; + static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES }; - /* ignore autorepeat and the like */ + /* Ignore autorepeat and the like. */ if (value == last_value) return; @@ -1190,6 +1241,119 @@ void add_input_randomness(unsigned int type, unsigned int code, } EXPORT_SYMBOL_GPL(add_input_randomness); +#ifdef CONFIG_BLOCK +void add_disk_randomness(struct gendisk *disk) +{ + if (!disk || !disk->random) + return; + /* First major is 1, so we get >= 0x200 here. */ + add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); +} +EXPORT_SYMBOL_GPL(add_disk_randomness); + +void rand_initialize_disk(struct gendisk *disk) +{ + struct timer_rand_state *state; + + /* + * If kzalloc returns null, we just won't use that entropy + * source. + */ + state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); + if (state) { + state->last_time = INITIAL_JIFFIES; + disk->random = state; + } +} +#endif + +/* + * Interface for in-kernel drivers of true hardware RNGs. + * Those devices may produce endless random bits and will be throttled + * when our pool is full. + */ +void add_hwgenerator_randomness(const void *buffer, size_t count, + size_t entropy) +{ + if (unlikely(crng_init == 0)) { + size_t ret = crng_fast_load(buffer, count); + mix_pool_bytes(buffer, ret); + count -= ret; + buffer += ret; + if (!count || crng_init == 0) + return; + } + + /* + * Throttle writing if we're above the trickle threshold. + * We'll be woken up again once below POOL_MIN_BITS, when + * the calling thread is about to terminate, or once + * CRNG_RESEED_INTERVAL has elapsed. + */ + wait_event_interruptible_timeout(random_write_wait, + !system_wq || kthread_should_stop() || + input_pool.entropy_count < POOL_MIN_BITS, + CRNG_RESEED_INTERVAL); + mix_pool_bytes(buffer, count); + credit_entropy_bits(entropy); +} +EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); + +/* + * Handle random seed passed by bootloader. + * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise + * it would be regarded as device data. + * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. + */ +void add_bootloader_randomness(const void *buf, size_t size) +{ + if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER)) + add_hwgenerator_randomness(buf, size, size * 8); + else + add_device_randomness(buf, size); +} +EXPORT_SYMBOL_GPL(add_bootloader_randomness); + +struct fast_pool { + union { + u32 pool32[4]; + u64 pool64[2]; + }; + unsigned long last; + u16 reg_idx; + u8 count; +}; + +/* + * This is a fast mixing routine used by the interrupt randomness + * collector. It's hardcoded for an 128 bit pool and assumes that any + * locks that might be needed are taken by the caller. + */ +static void fast_mix(u32 pool[4]) +{ + u32 a = pool[0], b = pool[1]; + u32 c = pool[2], d = pool[3]; + + a += b; c += d; + b = rol32(b, 6); d = rol32(d, 27); + d ^= a; b ^= c; + + a += b; c += d; + b = rol32(b, 16); d = rol32(d, 14); + d ^= a; b ^= c; + + a += b; c += d; + b = rol32(b, 6); d = rol32(d, 27); + d ^= a; b ^= c; + + a += b; c += d; + b = rol32(b, 16); d = rol32(d, 14); + d ^= a; b ^= c; + + pool[0] = a; pool[1] = b; + pool[2] = c; pool[3] = d; +} + static DEFINE_PER_CPU(struct fast_pool, irq_randomness); static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) @@ -1259,22 +1423,11 @@ void add_interrupt_randomness(int irq) fast_pool->count = 0; - /* award one bit for the contents of the fast pool */ + /* Award one bit for the contents of the fast pool. */ credit_entropy_bits(1); } EXPORT_SYMBOL_GPL(add_interrupt_randomness); -#ifdef CONFIG_BLOCK -void add_disk_randomness(struct gendisk *disk) -{ - if (!disk || !disk->random) - return; - /* first major is 1, so we get >= 0x200 here */ - add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); -} -EXPORT_SYMBOL_GPL(add_disk_randomness); -#endif - /* * Each time the timer fires, we expect that we got an unpredictable * jump in the cycle counter. Even if the timer is running on another @@ -1324,73 +1477,6 @@ static void try_to_generate_entropy(void) mix_pool_bytes(&stack.now, sizeof(stack.now)); } -static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); -static int __init parse_trust_cpu(char *arg) -{ - return kstrtobool(arg, &trust_cpu); -} -early_param("random.trust_cpu", parse_trust_cpu); - -/* - * Note that setup_arch() may call add_device_randomness() - * long before we get here. This allows seeding of the pools - * with some platform dependent data very early in the boot - * process. But it limits our options here. We must use - * statically allocated structures that already have all - * initializations complete at compile time. We should also - * take care not to overwrite the precious per platform data - * we were given. - */ -int __init rand_initialize(void) -{ - size_t i; - ktime_t now = ktime_get_real(); - bool arch_init = true; - unsigned long rv; - - for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) { - if (!arch_get_random_seed_long_early(&rv) && - !arch_get_random_long_early(&rv)) { - rv = random_get_entropy(); - arch_init = false; - } - mix_pool_bytes(&rv, sizeof(rv)); - } - mix_pool_bytes(&now, sizeof(now)); - mix_pool_bytes(utsname(), sizeof(*(utsname()))); - - extract_entropy(base_crng.key, sizeof(base_crng.key)); - ++base_crng.generation; - - if (arch_init && trust_cpu && crng_init < 2) { - crng_init = 2; - pr_notice("crng init done (trusting CPU's manufacturer)\n"); - } - - if (ratelimit_disable) { - urandom_warning.interval = 0; - unseeded_warning.interval = 0; - } - return 0; -} - -#ifdef CONFIG_BLOCK -void rand_initialize_disk(struct gendisk *disk) -{ - struct timer_rand_state *state; - - /* - * If kzalloc returns null, we just won't use that entropy - * source. - */ - state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); - if (state) { - state->last_time = INITIAL_JIFFIES; - disk->random = state; - } -} -#endif - static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { @@ -1675,47 +1761,3 @@ struct ctl_table random_table[] = { { } }; #endif /* CONFIG_SYSCTL */ - -/* Interface for in-kernel drivers of true hardware RNGs. - * Those devices may produce endless random bits and will be throttled - * when our pool is full. - */ -void add_hwgenerator_randomness(const void *buffer, size_t count, - size_t entropy) -{ - if (unlikely(crng_init == 0)) { - size_t ret = crng_fast_load(buffer, count); - mix_pool_bytes(buffer, ret); - count -= ret; - buffer += ret; - if (!count || crng_init == 0) - return; - } - - /* Throttle writing if we're above the trickle threshold. - * We'll be woken up again once below POOL_MIN_BITS, when - * the calling thread is about to terminate, or once - * CRNG_RESEED_INTERVAL has elapsed. - */ - wait_event_interruptible_timeout(random_write_wait, - !system_wq || kthread_should_stop() || - input_pool.entropy_count < POOL_MIN_BITS, - CRNG_RESEED_INTERVAL); - mix_pool_bytes(buffer, count); - credit_entropy_bits(entropy); -} -EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); - -/* Handle random seed passed by bootloader. - * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise - * it would be regarded as device data. - * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. - */ -void add_bootloader_randomness(const void *buf, size_t size) -{ - if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER)) - add_hwgenerator_randomness(buf, size, size * 8); - else - add_device_randomness(buf, size); -} -EXPORT_SYMBOL_GPL(add_bootloader_randomness); -- GitLab From 96340f035df66b0795edba0d027a98e2dd1b0353 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:53:34 +0100 Subject: [PATCH 0204/4335] random: group userspace read/write functions commit a6adf8e7a605250b911e94793fd077933709ff9e upstream. This pulls all of the userspace read/write-focused functions into the fifth labeled section. No functional changes. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 125 ++++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 48 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index e9857a4b76ec1..7d005f7ebcf5c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1477,30 +1477,61 @@ static void try_to_generate_entropy(void) mix_pool_bytes(&stack.now, sizeof(stack.now)); } -static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) + +/********************************************************************** + * + * Userspace reader/writer interfaces. + * + * getrandom(2) is the primary modern interface into the RNG and should + * be used in preference to anything else. + * + * Reading from /dev/random has the same functionality as calling + * getrandom(2) with flags=0. In earlier versions, however, it had + * vastly different semantics and should therefore be avoided, to + * prevent backwards compatibility issues. + * + * Reading from /dev/urandom has the same functionality as calling + * getrandom(2) with flags=GRND_INSECURE. Because it does not block + * waiting for the RNG to be ready, it should not be used. + * + * Writing to either /dev/random or /dev/urandom adds entropy to + * the input pool but does not credit it. + * + * Polling on /dev/random indicates when the RNG is initialized, on + * the read side, and when it wants new entropy, on the write side. + * + * Both /dev/random and /dev/urandom have the same set of ioctls for + * adding entropy, getting the entropy count, zeroing the count, and + * reseeding the crng. + * + **********************************************************************/ + +SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, + flags) { - static int maxwarn = 10; + if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) + return -EINVAL; - if (!crng_ready() && maxwarn > 0) { - maxwarn--; - if (__ratelimit(&urandom_warning)) - pr_notice("%s: uninitialized urandom read (%zd bytes read)\n", - current->comm, nbytes); - } + /* + * Requesting insecure and blocking randomness at the same time makes + * no sense. + */ + if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) + return -EINVAL; - return get_random_bytes_user(buf, nbytes); -} + if (count > INT_MAX) + count = INT_MAX; -static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) -{ - int ret; + if (!(flags & GRND_INSECURE) && !crng_ready()) { + int ret; - ret = wait_for_random_bytes(); - if (ret != 0) - return ret; - return get_random_bytes_user(buf, nbytes); + if (flags & GRND_NONBLOCK) + return -EAGAIN; + ret = wait_for_random_bytes(); + if (unlikely(ret)) + return ret; + } + return get_random_bytes_user(buf, count); } static __poll_t random_poll(struct file *file, poll_table *wait) @@ -1552,6 +1583,32 @@ static ssize_t random_write(struct file *file, const char __user *buffer, return (ssize_t)count; } +static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) +{ + static int maxwarn = 10; + + if (!crng_ready() && maxwarn > 0) { + maxwarn--; + if (__ratelimit(&urandom_warning)) + pr_notice("%s: uninitialized urandom read (%zd bytes read)\n", + current->comm, nbytes); + } + + return get_random_bytes_user(buf, nbytes); +} + +static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) +{ + int ret; + + ret = wait_for_random_bytes(); + if (ret != 0) + return ret; + return get_random_bytes_user(buf, nbytes); +} + static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) { int size, ent_count; @@ -1560,7 +1617,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) switch (cmd) { case RNDGETENTCNT: - /* inherently racy, no point locking */ + /* Inherently racy, no point locking. */ if (put_user(input_pool.entropy_count, p)) return -EFAULT; return 0; @@ -1636,34 +1693,6 @@ const struct file_operations urandom_fops = { .llseek = noop_llseek, }; -SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, - flags) -{ - if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) - return -EINVAL; - - /* - * Requesting insecure and blocking randomness at the same time makes - * no sense. - */ - if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) - return -EINVAL; - - if (count > INT_MAX) - count = INT_MAX; - - if (!(flags & GRND_INSECURE) && !crng_ready()) { - int ret; - - if (flags & GRND_NONBLOCK) - return -EAGAIN; - ret = wait_for_random_bytes(); - if (unlikely(ret)) - return ret; - } - return get_random_bytes_user(buf, count); -} - /******************************************************************** * * Sysctl interface -- GitLab From 9385681af846fc7b64078b5ff378f9c68492e23b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:53:34 +0100 Subject: [PATCH 0205/4335] random: group sysctl functions commit 0deff3c43206c24e746b1410f11125707ad3040e upstream. This pulls all of the sysctl-focused functions into the sixth labeled section. No functional changes. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 7d005f7ebcf5c..9a23983f3dace 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1693,9 +1693,34 @@ const struct file_operations urandom_fops = { .llseek = noop_llseek, }; + /******************************************************************** * - * Sysctl interface + * Sysctl interface. + * + * These are partly unused legacy knobs with dummy values to not break + * userspace and partly still useful things. They are usually accessible + * in /proc/sys/kernel/random/ and are as follows: + * + * - boot_id - a UUID representing the current boot. + * + * - uuid - a random UUID, different each time the file is read. + * + * - poolsize - the number of bits of entropy that the input pool can + * hold, tied to the POOL_BITS constant. + * + * - entropy_avail - the number of bits of entropy currently in the + * input pool. Always <= poolsize. + * + * - write_wakeup_threshold - the amount of entropy in the input pool + * below which write polls to /dev/random will unblock, requesting + * more entropy, tied to the POOL_MIN_BITS constant. It is writable + * to avoid breaking old userspaces, but writing to it does not + * change any behavior of the RNG. + * + * - urandom_min_reseed_secs - fixed to the meaningless value "60". + * It is writable to avoid breaking old userspaces, but writing + * to it does not change any behavior of the RNG. * ********************************************************************/ @@ -1703,8 +1728,8 @@ const struct file_operations urandom_fops = { #include -static int random_min_urandom_seed = 60; -static int random_write_wakeup_bits = POOL_MIN_BITS; +static int sysctl_random_min_urandom_seed = 60; +static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS; static int sysctl_poolsize = POOL_BITS; static char sysctl_bootid[16]; @@ -1762,14 +1787,14 @@ struct ctl_table random_table[] = { }, { .procname = "write_wakeup_threshold", - .data = &random_write_wakeup_bits, + .data = &sysctl_random_write_wakeup_bits, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "urandom_min_reseed_secs", - .data = &random_min_urandom_seed, + .data = &sysctl_random_min_urandom_seed, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, -- GitLab From 9489e36e215049048d09f22fe29a0fda3db5163a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:29:33 +0100 Subject: [PATCH 0206/4335] random: rewrite header introductory comment commit 5f75d9f3babea8ae0a2d06724656874f41d317f5 upstream. Now that we've re-documented the various sections, we can remove the outdated text here and replace it with a high-level overview. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 183 +++++------------------------------------- 1 file changed, 21 insertions(+), 162 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 9a23983f3dace..2b462d9a15637 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2,168 +2,27 @@ /* * Copyright (C) 2017-2022 Jason A. Donenfeld . All Rights Reserved. * Copyright Matt Mackall , 2003, 2004, 2005 - * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All - * rights reserved. - */ - -/* - * Exported interfaces ---- output - * =============================== - * - * There are four exported interfaces; two for use within the kernel, - * and two for use from userspace. - * - * Exported interfaces ---- userspace output - * ----------------------------------------- - * - * The userspace interfaces are two character devices /dev/random and - * /dev/urandom. /dev/random is suitable for use when very high - * quality randomness is desired (for example, for key generation or - * one-time pads), as it will only return a maximum of the number of - * bits of randomness (as estimated by the random number generator) - * contained in the entropy pool. - * - * The /dev/urandom device does not have this limit, and will return - * as many bytes as are requested. As more and more random bytes are - * requested without giving time for the entropy pool to recharge, - * this will result in random numbers that are merely cryptographically - * strong. For many applications, however, this is acceptable. - * - * Exported interfaces ---- kernel output - * -------------------------------------- - * - * The primary kernel interfaces are: - * - * void get_random_bytes(void *buf, size_t nbytes); - * u32 get_random_u32() - * u64 get_random_u64() - * unsigned int get_random_int() - * unsigned long get_random_long() - * - * These interfaces will return the requested number of random bytes - * into the given buffer or as a return value. This is equivalent to a - * read from /dev/urandom. The get_random_{u32,u64,int,long}() family - * of functions may be higher performance for one-off random integers, - * because they do a bit of buffering. - * - * prandom_u32() - * ------------- - * - * For even weaker applications, see the pseudorandom generator - * prandom_u32(), prandom_max(), and prandom_bytes(). If the random - * numbers aren't security-critical at all, these are *far* cheaper. - * Useful for self-tests, random error simulation, randomized backoffs, - * and any other application where you trust that nobody is trying to - * maliciously mess with you by guessing the "random" numbers. - * - * Exported interfaces ---- input - * ============================== - * - * The current exported interfaces for gathering environmental noise - * from the devices are: - * - * void add_device_randomness(const void *buf, size_t size); - * void add_input_randomness(unsigned int type, unsigned int code, - * unsigned int value); - * void add_interrupt_randomness(int irq); - * void add_disk_randomness(struct gendisk *disk); - * void add_hwgenerator_randomness(const void *buffer, size_t count, - * size_t entropy); - * void add_bootloader_randomness(const void *buf, size_t size); - * - * add_device_randomness() is for adding data to the random pool that - * is likely to differ between two devices (or possibly even per boot). - * This would be things like MAC addresses or serial numbers, or the - * read-out of the RTC. This does *not* add any actual entropy to the - * pool, but it initializes the pool to different values for devices - * that might otherwise be identical and have very little entropy - * available to them (particularly common in the embedded world). - * - * add_input_randomness() uses the input layer interrupt timing, as well as - * the event type information from the hardware. - * - * add_interrupt_randomness() uses the interrupt timing as random - * inputs to the entropy pool. Using the cycle counters and the irq source - * as inputs, it feeds the randomness roughly once a second. - * - * add_disk_randomness() uses what amounts to the seek time of block - * layer request events, on a per-disk_devt basis, as input to the - * entropy pool. Note that high-speed solid state drives with very low - * seek times do not make for good sources of entropy, as their seek - * times are usually fairly consistent. - * - * All of these routines try to estimate how many bits of randomness a - * particular randomness source. They do this by keeping track of the - * first and second order deltas of the event timings. - * - * add_hwgenerator_randomness() is for true hardware RNGs, and will credit - * entropy as specified by the caller. If the entropy pool is full it will - * block until more entropy is needed. - * - * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or - * add_device_randomness(), depending on whether or not the configuration - * option CONFIG_RANDOM_TRUST_BOOTLOADER is set. - * - * Ensuring unpredictability at system startup - * ============================================ - * - * When any operating system starts up, it will go through a sequence - * of actions that are fairly predictable by an adversary, especially - * if the start-up does not involve interaction with a human operator. - * This reduces the actual number of bits of unpredictability in the - * entropy pool below the value in entropy_count. In order to - * counteract this effect, it helps to carry information in the - * entropy pool across shut-downs and start-ups. To do this, put the - * following lines an appropriate script which is run during the boot - * sequence: - * - * echo "Initializing random number generator..." - * random_seed=/var/run/random-seed - * # Carry a random seed from start-up to start-up - * # Load and then save the whole entropy pool - * if [ -f $random_seed ]; then - * cat $random_seed >/dev/urandom - * else - * touch $random_seed - * fi - * chmod 600 $random_seed - * dd if=/dev/urandom of=$random_seed count=1 bs=512 - * - * and the following lines in an appropriate script which is run as - * the system is shutdown: - * - * # Carry a random seed from shut-down to start-up - * # Save the whole entropy pool - * echo "Saving random seed..." - * random_seed=/var/run/random-seed - * touch $random_seed - * chmod 600 $random_seed - * dd if=/dev/urandom of=$random_seed count=1 bs=512 - * - * For example, on most modern systems using the System V init - * scripts, such code fragments would be found in - * /etc/rc.d/init.d/random. On older Linux systems, the correct script - * location might be in /etc/rcb.d/rc.local or /etc/rc.d/rc.0. - * - * Effectively, these commands cause the contents of the entropy pool - * to be saved at shut-down time and reloaded into the entropy pool at - * start-up. (The 'dd' in the addition to the bootup script is to - * make sure that /etc/random-seed is different for every start-up, - * even if the system crashes without executing rc.0.) Even with - * complete knowledge of the start-up activities, predicting the state - * of the entropy pool requires knowledge of the previous history of - * the system. - * - * Configuring the /dev/random driver under Linux - * ============================================== - * - * The /dev/random driver under Linux uses minor numbers 8 and 9 of - * the /dev/mem major number (#1). So if your system does not have - * /dev/random and /dev/urandom created already, they can be created - * by using the commands: - * - * mknod /dev/random c 1 8 - * mknod /dev/urandom c 1 9 + * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All rights reserved. + * + * This driver produces cryptographically secure pseudorandom data. It is divided + * into roughly six sections, each with a section header: + * + * - Initialization and readiness waiting. + * - Fast key erasure RNG, the "crng". + * - Entropy accumulation and extraction routines. + * - Entropy collection routines. + * - Userspace reader/writer interfaces. + * - Sysctl interface. + * + * The high level overview is that there is one input pool, into which + * various pieces of data are hashed. Some of that data is then "credited" as + * having a certain number of bits of entropy. When enough bits of entropy are + * available, the hash is finalized and handed as a key to a stream cipher that + * expands it indefinitely for various consumers. This key is periodically + * refreshed as the various entropy collectors, described below, add data to the + * input pool and credit it. There is currently no Fortuna-like scheduler + * involved, which can lead to malicious entropy sources causing a premature + * reseed, and the entropy estimates are, at best, conservative guesses. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -- GitLab From 3c48a2da32ef8ebfa8b71233b790f45d48704b2a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 4 Feb 2022 16:15:46 +0100 Subject: [PATCH 0207/4335] random: defer fast pool mixing to worker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 58340f8e952b613e0ead0bed58b97b05bf4743c5 upstream. On PREEMPT_RT, it's problematic to take spinlocks from hard irq handlers. We can fix this by deferring to a workqueue the dumping of the fast pool into the input pool. We accomplish this with some careful rules on fast_pool->count: - When it's incremented to >= 64, we schedule the work. - If the top bit is set, we never schedule the work, even if >= 64. - The worker is responsible for setting it back to 0 when it's done. There are two small issues around using workqueues for this purpose that we work around. The first issue is that mix_interrupt_randomness() might be migrated to another CPU during CPU hotplug. This issue is rectified by checking that it hasn't been migrated (after disabling irqs). If it has been migrated, then we set the count to zero, so that when the CPU comes online again, it can requeue the work. As part of this, we switch to using an atomic_t, so that the increment in the irq handler doesn't wipe out the zeroing if the CPU comes back online while this worker is running. The second issue is that, though relatively minor in effect, we probably want to make sure we get a consistent view of the pool onto the stack, in case it's interrupted by an irq while reading. To do this, we don't reenable irqs until after the copy. There are only 18 instructions between the cli and sti, so this is a pretty tiny window. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Theodore Ts'o Cc: Jonathan Neuschäfer Acked-by: Sebastian Andrzej Siewior Reviewed-by: Sultan Alsawaf Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 63 +++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 2b462d9a15637..8728de2f44508 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1178,9 +1178,10 @@ struct fast_pool { u32 pool32[4]; u64 pool64[2]; }; + struct work_struct mix; unsigned long last; + atomic_t count; u16 reg_idx; - u8 count; }; /* @@ -1230,12 +1231,49 @@ static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) return *ptr; } +static void mix_interrupt_randomness(struct work_struct *work) +{ + struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix); + u32 pool[4]; + + /* Check to see if we're running on the wrong CPU due to hotplug. */ + local_irq_disable(); + if (fast_pool != this_cpu_ptr(&irq_randomness)) { + local_irq_enable(); + /* + * If we are unlucky enough to have been moved to another CPU, + * during CPU hotplug while the CPU was shutdown then we set + * our count to zero atomically so that when the CPU comes + * back online, it can enqueue work again. The _release here + * pairs with the atomic_inc_return_acquire in + * add_interrupt_randomness(). + */ + atomic_set_release(&fast_pool->count, 0); + return; + } + + /* + * Copy the pool to the stack so that the mixer always has a + * consistent view, before we reenable irqs again. + */ + memcpy(pool, fast_pool->pool32, sizeof(pool)); + atomic_set(&fast_pool->count, 0); + fast_pool->last = jiffies; + local_irq_enable(); + + mix_pool_bytes(pool, sizeof(pool)); + credit_entropy_bits(1); + memzero_explicit(pool, sizeof(pool)); +} + void add_interrupt_randomness(int irq) { + enum { MIX_INFLIGHT = 1U << 31 }; struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned long now = jiffies; cycles_t cycles = random_get_entropy(); + unsigned int new_count; if (cycles == 0) cycles = get_reg(fast_pool, regs); @@ -1255,12 +1293,13 @@ void add_interrupt_randomness(int irq) } fast_mix(fast_pool->pool32); - ++fast_pool->count; + /* The _acquire here pairs with the atomic_set_release in mix_interrupt_randomness(). */ + new_count = (unsigned int)atomic_inc_return_acquire(&fast_pool->count); if (unlikely(crng_init == 0)) { - if (fast_pool->count >= 64 && + if (new_count >= 64 && crng_fast_load(fast_pool->pool32, sizeof(fast_pool->pool32)) > 0) { - fast_pool->count = 0; + atomic_set(&fast_pool->count, 0); fast_pool->last = now; if (spin_trylock(&input_pool.lock)) { _mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32)); @@ -1270,20 +1309,16 @@ void add_interrupt_randomness(int irq) return; } - if ((fast_pool->count < 64) && !time_after(now, fast_pool->last + HZ)) + if (new_count & MIX_INFLIGHT) return; - if (!spin_trylock(&input_pool.lock)) + if (new_count < 64 && !time_after(now, fast_pool->last + HZ)) return; - fast_pool->last = now; - _mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32)); - spin_unlock(&input_pool.lock); - - fast_pool->count = 0; - - /* Award one bit for the contents of the fast pool. */ - credit_entropy_bits(1); + if (unlikely(!fast_pool->mix.func)) + INIT_WORK(&fast_pool->mix, mix_interrupt_randomness); + atomic_or(MIX_INFLIGHT, &fast_pool->count); + queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix); } EXPORT_SYMBOL_GPL(add_interrupt_randomness); -- GitLab From d386b087056a354a3cd59e40e70e06e621d8a58e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 12 Feb 2022 01:26:17 +0100 Subject: [PATCH 0208/4335] random: do not take pool spinlock at boot commit afba0b80b977b2a8f16234f2acd982f82710ba33 upstream. Since rand_initialize() is run while interrupts are still off and nothing else is running, we don't need to repeatedly take and release the pool spinlock, especially in the RDSEED loop. Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 8728de2f44508..1d0249e017d0c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -978,10 +978,10 @@ int __init rand_initialize(void) rv = random_get_entropy(); arch_init = false; } - mix_pool_bytes(&rv, sizeof(rv)); + _mix_pool_bytes(&rv, sizeof(rv)); } - mix_pool_bytes(&now, sizeof(now)); - mix_pool_bytes(utsname(), sizeof(*(utsname()))); + _mix_pool_bytes(&now, sizeof(now)); + _mix_pool_bytes(utsname(), sizeof(*(utsname()))); extract_entropy(base_crng.key, sizeof(base_crng.key)); ++base_crng.generation; -- GitLab From 9df4a56fad32c31049bebc9800b4ea50e1288093 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 12 Feb 2022 23:54:09 +0100 Subject: [PATCH 0209/4335] random: unify early init crng load accounting commit da792c6d5f59a76c10a310c5d4c93428fd18f996 upstream. crng_fast_load() and crng_slow_load() have different semantics: - crng_fast_load() xors and accounts with crng_init_cnt. - crng_slow_load() hashes and doesn't account. However add_hwgenerator_randomness() can afford to hash (it's called from a kthread), and it should account. Additionally, ones that can afford to hash don't need to take a trylock but can take a normal lock. So, we combine these into one function, crng_pre_init_inject(), which allows us to control these in a uniform way. This will make it simpler later to simplify this all down when the time comes for that. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 112 ++++++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 54 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 1d0249e017d0c..fdfb0dec6ae6c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -386,7 +386,7 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], * For the fast path, we check whether we're ready, unlocked first, and * then re-check once locked later. In the case where we're really not * ready, we do fast key erasure with the base_crng directly, because - * this is what crng_{fast,slow}_load mutate during early init. + * this is what crng_pre_init_inject() mutates during early init. */ if (unlikely(!crng_ready())) { bool ready; @@ -437,72 +437,75 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], } /* - * This function is for crng_init == 0 only. - * - * crng_fast_load() can be called by code in the interrupt service - * path. So we can't afford to dilly-dally. Returns the number of - * bytes processed from cp. + * This function is for crng_init == 0 only. It loads entropy directly + * into the crng's key, without going through the input pool. It is, + * generally speaking, not very safe, but we use this only at early + * boot time when it's better to have something there rather than + * nothing. + * + * There are two paths, a slow one and a fast one. The slow one + * hashes the input along with the current key. The fast one simply + * xors it in, and should only be used from interrupt context. + * + * If account is set, then the crng_init_cnt counter is incremented. + * This shouldn't be set by functions like add_device_randomness(), + * where we can't trust the buffer passed to it is guaranteed to be + * unpredictable (so it might not have any entropy at all). + * + * Returns the number of bytes processed from input, which is bounded + * by CRNG_INIT_CNT_THRESH if account is true. */ -static size_t crng_fast_load(const void *cp, size_t len) +static size_t crng_pre_init_inject(const void *input, size_t len, + bool fast, bool account) { static int crng_init_cnt = 0; unsigned long flags; - const u8 *src = (const u8 *)cp; - size_t ret = 0; - if (!spin_trylock_irqsave(&base_crng.lock, flags)) - return 0; + if (fast) { + if (!spin_trylock_irqsave(&base_crng.lock, flags)) + return 0; + } else { + spin_lock_irqsave(&base_crng.lock, flags); + } + if (crng_init != 0) { spin_unlock_irqrestore(&base_crng.lock, flags); return 0; } - while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { - base_crng.key[crng_init_cnt % sizeof(base_crng.key)] ^= *src; - src++; crng_init_cnt++; len--; ret++; - } - if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { - ++base_crng.generation; - crng_init = 1; - } - spin_unlock_irqrestore(&base_crng.lock, flags); - if (crng_init == 1) - pr_notice("fast init done\n"); - return ret; -} -/* - * This function is for crng_init == 0 only. - * - * crng_slow_load() is called by add_device_randomness, which has two - * attributes. (1) We can't trust the buffer passed to it is - * guaranteed to be unpredictable (so it might not have any entropy at - * all), and (2) it doesn't have the performance constraints of - * crng_fast_load(). - * - * So, we simply hash the contents in with the current key. Finally, - * we do *not* advance crng_init_cnt since buffer we may get may be - * something like a fixed DMI table (for example), which might very - * well be unique to the machine, but is otherwise unvarying. - */ -static void crng_slow_load(const void *cp, size_t len) -{ - unsigned long flags; - struct blake2s_state hash; + if (account) + len = min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); - blake2s_init(&hash, sizeof(base_crng.key)); + if (fast) { + const u8 *src = input; + size_t i; - if (!spin_trylock_irqsave(&base_crng.lock, flags)) - return; - if (crng_init != 0) { - spin_unlock_irqrestore(&base_crng.lock, flags); - return; + for (i = 0; i < len; ++i) + base_crng.key[(crng_init_cnt + i) % + sizeof(base_crng.key)] ^= src[i]; + } else { + struct blake2s_state hash; + + blake2s_init(&hash, sizeof(base_crng.key)); + blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); + blake2s_update(&hash, input, len); + blake2s_final(&hash, base_crng.key); } - blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); - blake2s_update(&hash, cp, len); - blake2s_final(&hash, base_crng.key); + if (account) { + crng_init_cnt += len; + if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { + ++base_crng.generation; + crng_init = 1; + } + } spin_unlock_irqrestore(&base_crng.lock, flags); + + if (crng_init == 1) + pr_notice("fast init done\n"); + + return len; } static void _get_random_bytes(void *buf, size_t nbytes) @@ -1018,7 +1021,7 @@ void add_device_randomness(const void *buf, size_t size) unsigned long flags; if (!crng_ready() && size) - crng_slow_load(buf, size); + crng_pre_init_inject(buf, size, false, false); spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(buf, size); @@ -1135,7 +1138,7 @@ void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy) { if (unlikely(crng_init == 0)) { - size_t ret = crng_fast_load(buffer, count); + size_t ret = crng_pre_init_inject(buffer, count, false, true); mix_pool_bytes(buffer, ret); count -= ret; buffer += ret; @@ -1298,7 +1301,8 @@ void add_interrupt_randomness(int irq) if (unlikely(crng_init == 0)) { if (new_count >= 64 && - crng_fast_load(fast_pool->pool32, sizeof(fast_pool->pool32)) > 0) { + crng_pre_init_inject(fast_pool->pool32, sizeof(fast_pool->pool32), + true, true) > 0) { atomic_set(&fast_pool->count, 0); fast_pool->last = now; if (spin_trylock(&input_pool.lock)) { -- GitLab From 98d6def672fd10d546129cb07dc7268c5f634079 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 12 Feb 2022 23:57:38 +0100 Subject: [PATCH 0210/4335] random: check for crng_init == 0 in add_device_randomness() commit 1daf2f387652bf3a7044aea042f5023b3f6b189b upstream. This has no real functional change, as crng_pre_init_inject() (and before that, crng_slow_init()) always checks for == 0, not >= 2. So correct the outer unlocked change to reflect that. Before this used crng_ready(), which was not correct. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index fdfb0dec6ae6c..adf42f7d7413c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1020,7 +1020,7 @@ void add_device_randomness(const void *buf, size_t size) unsigned long time = random_get_entropy() ^ jiffies; unsigned long flags; - if (!crng_ready() && size) + if (crng_init == 0 && size) crng_pre_init_inject(buf, size, false, false); spin_lock_irqsave(&input_pool.lock, flags); -- GitLab From 80c1f7f8f3cd58114fedb190f744f485e54541c9 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 13 Feb 2022 16:17:01 +0100 Subject: [PATCH 0211/4335] random: pull add_hwgenerator_randomness() declaration into random.h commit b777c38239fec5a528e59f55b379e31b1a187524 upstream. add_hwgenerator_randomness() is a function implemented and documented inside of random.c. It is the way that hardware RNGs push data into it. Therefore, it should be declared in random.h. Otherwise sparse complains with: random.c:1137:6: warning: symbol 'add_hwgenerator_randomness' was not declared. Should it be static? The alternative would be to include hw_random.h into random.c, but that wouldn't really be good for anything except slowing down compile time. Cc: Matt Mackall Cc: Theodore Ts'o Acked-by: Herbert Xu Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/hw_random/core.c | 1 + include/linux/hw_random.h | 2 -- include/linux/random.h | 2 ++ 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index a3db27916256d..cfb085de876b7 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 1a9fc38f8938c..aa1d4da03538b 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -60,7 +60,5 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); /** Unregister a Hardware Random Number Generator driver. */ extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); -/** Feed random bits into the pool. */ -extern void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy); #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/random.h b/include/linux/random.h index 37e1e8c43d7ee..d7354de9351e9 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -32,6 +32,8 @@ static inline void add_latent_entropy(void) {} extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; extern void add_interrupt_randomness(int irq) __latent_entropy; +extern void add_hwgenerator_randomness(const void *buffer, size_t count, + size_t entropy); extern void get_random_bytes(void *buf, size_t nbytes); extern int wait_for_random_bytes(void); -- GitLab From 144c1e7ecf00688ae8c7f9a8483820527505a9d7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 13 Feb 2022 22:48:04 +0100 Subject: [PATCH 0212/4335] random: clear fast pool, crng, and batches in cpuhp bring up commit 3191dd5a1179ef0fad5a050a1702ae98b6251e8f upstream. For the irq randomness fast pool, rather than having to use expensive atomics, which were visibly the most expensive thing in the entire irq handler, simply take care of the extreme edge case of resetting count to zero in the cpuhp online handler, just after workqueues have been reenabled. This simplifies the code a bit and lets us use vanilla variables rather than atomics, and performance should be improved. As well, very early on when the CPU comes up, while interrupts are still disabled, we clear out the per-cpu crng and its batches, so that it always starts with fresh randomness. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Theodore Ts'o Cc: Sultan Alsawaf Cc: Dominik Brodowski Acked-by: Sebastian Andrzej Siewior Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 62 +++++++++++++++++++++++++++++--------- include/linux/cpuhotplug.h | 2 ++ include/linux/random.h | 5 +++ kernel/cpu.c | 11 +++++++ 4 files changed, 65 insertions(+), 15 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index adf42f7d7413c..6c36940e2c6ec 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -698,6 +698,25 @@ u32 get_random_u32(void) } EXPORT_SYMBOL(get_random_u32); +#ifdef CONFIG_SMP +/* + * This function is called when the CPU is coming up, with entry + * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP. + */ +int random_prepare_cpu(unsigned int cpu) +{ + /* + * When the cpu comes back online, immediately invalidate both + * the per-cpu crng and all batches, so that we serve fresh + * randomness. + */ + per_cpu_ptr(&crngs, cpu)->generation = ULONG_MAX; + per_cpu_ptr(&batched_entropy_u32, cpu)->position = UINT_MAX; + per_cpu_ptr(&batched_entropy_u64, cpu)->position = UINT_MAX; + return 0; +} +#endif + /** * randomize_page - Generate a random, page aligned address * @start: The smallest acceptable address the caller will take. @@ -1183,7 +1202,7 @@ struct fast_pool { }; struct work_struct mix; unsigned long last; - atomic_t count; + unsigned int count; u16 reg_idx; }; @@ -1219,6 +1238,29 @@ static void fast_mix(u32 pool[4]) static DEFINE_PER_CPU(struct fast_pool, irq_randomness); +#ifdef CONFIG_SMP +/* + * This function is called when the CPU has just come online, with + * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE. + */ +int random_online_cpu(unsigned int cpu) +{ + /* + * During CPU shutdown and before CPU onlining, add_interrupt_ + * randomness() may schedule mix_interrupt_randomness(), and + * set the MIX_INFLIGHT flag. However, because the worker can + * be scheduled on a different CPU during this period, that + * flag will never be cleared. For that reason, we zero out + * the flag here, which runs just after workqueues are onlined + * for the CPU again. This also has the effect of setting the + * irq randomness count to zero so that new accumulated irqs + * are fresh. + */ + per_cpu_ptr(&irq_randomness, cpu)->count = 0; + return 0; +} +#endif + static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) { u32 *ptr = (u32 *)regs; @@ -1243,15 +1285,6 @@ static void mix_interrupt_randomness(struct work_struct *work) local_irq_disable(); if (fast_pool != this_cpu_ptr(&irq_randomness)) { local_irq_enable(); - /* - * If we are unlucky enough to have been moved to another CPU, - * during CPU hotplug while the CPU was shutdown then we set - * our count to zero atomically so that when the CPU comes - * back online, it can enqueue work again. The _release here - * pairs with the atomic_inc_return_acquire in - * add_interrupt_randomness(). - */ - atomic_set_release(&fast_pool->count, 0); return; } @@ -1260,7 +1293,7 @@ static void mix_interrupt_randomness(struct work_struct *work) * consistent view, before we reenable irqs again. */ memcpy(pool, fast_pool->pool32, sizeof(pool)); - atomic_set(&fast_pool->count, 0); + fast_pool->count = 0; fast_pool->last = jiffies; local_irq_enable(); @@ -1296,14 +1329,13 @@ void add_interrupt_randomness(int irq) } fast_mix(fast_pool->pool32); - /* The _acquire here pairs with the atomic_set_release in mix_interrupt_randomness(). */ - new_count = (unsigned int)atomic_inc_return_acquire(&fast_pool->count); + new_count = ++fast_pool->count; if (unlikely(crng_init == 0)) { if (new_count >= 64 && crng_pre_init_inject(fast_pool->pool32, sizeof(fast_pool->pool32), true, true) > 0) { - atomic_set(&fast_pool->count, 0); + fast_pool->count = 0; fast_pool->last = now; if (spin_trylock(&input_pool.lock)) { _mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32)); @@ -1321,7 +1353,7 @@ void add_interrupt_randomness(int irq) if (unlikely(!fast_pool->mix.func)) INIT_WORK(&fast_pool->mix, mix_interrupt_randomness); - atomic_or(MIX_INFLIGHT, &fast_pool->count); + fast_pool->count |= MIX_INFLIGHT; queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix); } EXPORT_SYMBOL_GPL(add_interrupt_randomness); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 991911048857a..c88ccc48877d6 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -99,6 +99,7 @@ enum cpuhp_state { CPUHP_LUSTRE_CFS_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, + CPUHP_RANDOM_PREPARE, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, @@ -238,6 +239,7 @@ enum cpuhp_state { CPUHP_AP_PERF_CSKY_ONLINE, CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, + CPUHP_AP_RANDOM_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_BASE_CACHEINFO_ONLINE, CPUHP_AP_ONLINE_DYN, diff --git a/include/linux/random.h b/include/linux/random.h index d7354de9351e9..6148b8d1ccf34 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -156,4 +156,9 @@ static inline bool __init arch_get_random_long_early(unsigned long *v) } #endif +#ifdef CONFIG_SMP +extern int random_prepare_cpu(unsigned int cpu); +extern int random_online_cpu(unsigned int cpu); +#endif + #endif /* _LINUX_RANDOM_H */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 5601216eb51bd..da871eb075662 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -1659,6 +1660,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = perf_event_init_cpu, .teardown.single = perf_event_exit_cpu, }, + [CPUHP_RANDOM_PREPARE] = { + .name = "random:prepare", + .startup.single = random_prepare_cpu, + .teardown.single = NULL, + }, [CPUHP_WORKQUEUE_PREP] = { .name = "workqueue:prepare", .startup.single = workqueue_prepare_cpu, @@ -1782,6 +1788,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = workqueue_online_cpu, .teardown.single = workqueue_offline_cpu, }, + [CPUHP_AP_RANDOM_ONLINE] = { + .name = "random:online", + .startup.single = random_online_cpu, + .teardown.single = NULL, + }, [CPUHP_AP_RCUTREE_ONLINE] = { .name = "RCU/tree:online", .startup.single = rcutree_online_cpu, -- GitLab From e400ba11a241386dc27e57c35c00f61dce92e2f4 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 22 Feb 2022 13:46:10 +0100 Subject: [PATCH 0213/4335] random: round-robin registers as ulong, not u32 commit da3951ebdcd1cb1d5c750e08cd05aee7b0c04d9a upstream. When the interrupt handler does not have a valid cycle counter, it calls get_reg() to read a register from the irq stack, in round-robin. Currently it does this assuming that registers are 32-bit. This is _probably_ the case, and probably all platforms without cycle counters are in fact 32-bit platforms. But maybe not, and either way, it's not quite correct. This commit fixes that to deal with `unsigned long` rather than `u32`. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 6c36940e2c6ec..35991d87d6199 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1261,15 +1261,15 @@ int random_online_cpu(unsigned int cpu) } #endif -static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) +static unsigned long get_reg(struct fast_pool *f, struct pt_regs *regs) { - u32 *ptr = (u32 *)regs; + unsigned long *ptr = (unsigned long *)regs; unsigned int idx; if (regs == NULL) return 0; idx = READ_ONCE(f->reg_idx); - if (idx >= sizeof(struct pt_regs) / sizeof(u32)) + if (idx >= sizeof(struct pt_regs) / sizeof(unsigned long)) idx = 0; ptr += idx++; WRITE_ONCE(f->reg_idx, idx); -- GitLab From c169e7a09cd48a3f2ae2978e6713fd72f09f284c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 22 Feb 2022 14:01:57 +0100 Subject: [PATCH 0214/4335] random: only wake up writers after zap if threshold was passed commit a3f9e8910e1584d7725ef7d5ac870920d42d0bb4 upstream. The only time that we need to wake up /dev/random writers on RNDCLEARPOOL/RNDZAPPOOL is when we're changing from a value that is greater than or equal to POOL_MIN_BITS to zero, because if we're changing from below POOL_MIN_BITS to zero, the writers are already unblocked. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 35991d87d6199..6a5da2fe2305f 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1582,7 +1582,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (xchg(&input_pool.entropy_count, 0)) { + if (xchg(&input_pool.entropy_count, 0) >= POOL_MIN_BITS) { wake_up_interruptible(&random_write_wait); kill_fasync(&fasync, SIGIO, POLL_OUT); } -- GitLab From fdb1e54472c3ead0d82a33bda78f3376736db5f1 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 24 Feb 2022 23:04:56 +0100 Subject: [PATCH 0215/4335] random: cleanup UUID handling commit 64276a9939ff414f2f0db38036cf4e1a0a703394 upstream. Rather than hard coding various lengths, we can use the right constants. Strings should be `char *` while buffers should be `u8 *`. Rather than have a nonsensical and unused maxlength, just remove it. Finally, use snprintf instead of sprintf, just out of good hygiene. As well, remove the old comment about returning a binary UUID via the binary sysctl syscall. That syscall was removed from the kernel in 5.5, and actually, the "uuid_strategy" function and related infrastructure for even serving it via the binary sysctl syscall was removed with 894d2491153a ("sysctl drivers: Remove dead binary sysctl support") back in 2.6.33. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 6a5da2fe2305f..9785f78f49ce8 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1661,22 +1661,25 @@ const struct file_operations urandom_fops = { static int sysctl_random_min_urandom_seed = 60; static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS; static int sysctl_poolsize = POOL_BITS; -static char sysctl_bootid[16]; +static u8 sysctl_bootid[UUID_SIZE]; /* * This function is used to return both the bootid UUID, and random - * UUID. The difference is in whether table->data is NULL; if it is, + * UUID. The difference is in whether table->data is NULL; if it is, * then a new UUID is generated and returned to the user. - * - * If the user accesses this via the proc interface, the UUID will be - * returned as an ASCII string in the standard UUID format; if via the - * sysctl system call, as 16 bytes of binary data. */ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct ctl_table fake_table; - unsigned char buf[64], tmp_uuid[16], *uuid; + u8 tmp_uuid[UUID_SIZE], *uuid; + char uuid_string[UUID_STRING_LEN + 1]; + struct ctl_table fake_table = { + .data = uuid_string, + .maxlen = UUID_STRING_LEN + }; + + if (write) + return -EPERM; uuid = table->data; if (!uuid) { @@ -1691,12 +1694,8 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, spin_unlock(&bootid_spinlock); } - sprintf(buf, "%pU", uuid); - - fake_table.data = buf; - fake_table.maxlen = sizeof(buf); - - return proc_dostring(&fake_table, write, buffer, lenp, ppos); + snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid); + return proc_dostring(&fake_table, 0, buffer, lenp, ppos); } extern struct ctl_table random_table[]; @@ -1732,13 +1731,11 @@ struct ctl_table random_table[] = { { .procname = "boot_id", .data = &sysctl_bootid, - .maxlen = 16, .mode = 0444, .proc_handler = proc_do_uuid, }, { .procname = "uuid", - .maxlen = 16, .mode = 0444, .proc_handler = proc_do_uuid, }, -- GitLab From 1523ec5324e9fef549615b008c2a4a3e7c1d0711 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 24 Feb 2022 18:30:58 +0100 Subject: [PATCH 0216/4335] random: unify cycles_t and jiffies usage and types commit abded93ec1e9692920fe309f07f40bd1035f2940 upstream. random_get_entropy() returns a cycles_t, not an unsigned long, which is sometimes 64 bits on various 32-bit platforms, including x86. Conversely, jiffies is always unsigned long. This commit fixes things to use cycles_t for fields that use random_get_entropy(), named "cycles", and unsigned long for fields that use jiffies, named "now". It's also good to mix in a cycles_t and a jiffies in the same way for both add_device_randomness and add_timer_randomness, rather than using xor in one case. Finally, we unify the order of these volatile reads, always reading the more precise cycles counter, and then jiffies, so that the cycle counter is as close to the event as possible. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 56 +++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 9785f78f49ce8..27d9edfce3e0e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1020,12 +1020,6 @@ int __init rand_initialize(void) return 0; } -/* There is one of these per entropy source */ -struct timer_rand_state { - cycles_t last_time; - long last_delta, last_delta2; -}; - /* * Add device- or boot-specific data to the input pool to help * initialize it. @@ -1036,19 +1030,26 @@ struct timer_rand_state { */ void add_device_randomness(const void *buf, size_t size) { - unsigned long time = random_get_entropy() ^ jiffies; - unsigned long flags; + cycles_t cycles = random_get_entropy(); + unsigned long flags, now = jiffies; if (crng_init == 0 && size) crng_pre_init_inject(buf, size, false, false); spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(&cycles, sizeof(cycles)); + _mix_pool_bytes(&now, sizeof(now)); _mix_pool_bytes(buf, size); - _mix_pool_bytes(&time, sizeof(time)); spin_unlock_irqrestore(&input_pool.lock, flags); } EXPORT_SYMBOL(add_device_randomness); +/* There is one of these per entropy source */ +struct timer_rand_state { + unsigned long last_time; + long last_delta, last_delta2; +}; + /* * This function adds entropy to the entropy "pool" by using timing * delays. It uses the timer_rand_state structure to make an estimate @@ -1057,29 +1058,26 @@ EXPORT_SYMBOL(add_device_randomness); * The number "num" is also added to the pool - it should somehow describe * the type of event which just happened. This is currently 0-255 for * keyboard scan codes, and 256 upwards for interrupts. - * */ static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) { - struct { - long jiffies; - unsigned int cycles; - unsigned int num; - } sample; + cycles_t cycles = random_get_entropy(); + unsigned long flags, now = jiffies; long delta, delta2, delta3; - sample.jiffies = jiffies; - sample.cycles = random_get_entropy(); - sample.num = num; - mix_pool_bytes(&sample, sizeof(sample)); + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(&cycles, sizeof(cycles)); + _mix_pool_bytes(&now, sizeof(now)); + _mix_pool_bytes(&num, sizeof(num)); + spin_unlock_irqrestore(&input_pool.lock, flags); /* * Calculate number of bits of randomness we probably added. * We take into account the first, second and third-order deltas * in order to make our estimate. */ - delta = sample.jiffies - READ_ONCE(state->last_time); - WRITE_ONCE(state->last_time, sample.jiffies); + delta = now - READ_ONCE(state->last_time); + WRITE_ONCE(state->last_time, now); delta2 = delta - READ_ONCE(state->last_delta); WRITE_ONCE(state->last_delta, delta); @@ -1305,10 +1303,10 @@ static void mix_interrupt_randomness(struct work_struct *work) void add_interrupt_randomness(int irq) { enum { MIX_INFLIGHT = 1U << 31 }; + cycles_t cycles = random_get_entropy(); + unsigned long now = jiffies; struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); - unsigned long now = jiffies; - cycles_t cycles = random_get_entropy(); unsigned int new_count; if (cycles == 0) @@ -1383,28 +1381,28 @@ static void entropy_timer(struct timer_list *t) static void try_to_generate_entropy(void) { struct { - unsigned long now; + cycles_t cycles; struct timer_list timer; } stack; - stack.now = random_get_entropy(); + stack.cycles = random_get_entropy(); /* Slow counter - or none. Don't even bother */ - if (stack.now == random_get_entropy()) + if (stack.cycles == random_get_entropy()) return; timer_setup_on_stack(&stack.timer, entropy_timer, 0); while (!crng_ready()) { if (!timer_pending(&stack.timer)) mod_timer(&stack.timer, jiffies + 1); - mix_pool_bytes(&stack.now, sizeof(stack.now)); + mix_pool_bytes(&stack.cycles, sizeof(stack.cycles)); schedule(); - stack.now = random_get_entropy(); + stack.cycles = random_get_entropy(); } del_timer_sync(&stack.timer); destroy_timer_on_stack(&stack.timer); - mix_pool_bytes(&stack.now, sizeof(stack.now)); + mix_pool_bytes(&stack.cycles, sizeof(stack.cycles)); } -- GitLab From 2c60d7f38d640e5d01553218eaf4163a1089e6e2 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 13 Feb 2022 18:25:07 +0100 Subject: [PATCH 0217/4335] random: do crng pre-init loading in worker rather than irq commit c2a7de4feb6e09f23af7accc0f882a8fa92e7ae5 upstream. Taking spinlocks from IRQ context is generally problematic for PREEMPT_RT. That is, in part, why we take trylocks instead. However, a spin_try_lock() is also problematic since another spin_lock() invocation can potentially PI-boost the wrong task, as the spin_try_lock() is invoked from an IRQ-context, so the task on CPU (random task or idle) is not the actual owner. Additionally, by deferring the crng pre-init loading to the worker, we can use the cryptographic hash function rather than xor, which is perhaps a meaningful difference when considering this data has only been through the relatively weak fast_mix() function. The biggest downside of this approach is that the pre-init loading is now deferred until later, which means things that need random numbers after interrupts are enabled, but before workqueues are running -- or before this particular worker manages to run -- are going to get into trouble. Hopefully in the real world, this window is rather small, especially since this code won't run until 64 interrupts had occurred. Cc: Sultan Alsawaf Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Eric Biggers Cc: Theodore Ts'o Acked-by: Sebastian Andrzej Siewior Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 65 +++++++++++++------------------------------ 1 file changed, 19 insertions(+), 46 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 27d9edfce3e0e..140514f36f739 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -443,10 +443,6 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], * boot time when it's better to have something there rather than * nothing. * - * There are two paths, a slow one and a fast one. The slow one - * hashes the input along with the current key. The fast one simply - * xors it in, and should only be used from interrupt context. - * * If account is set, then the crng_init_cnt counter is incremented. * This shouldn't be set by functions like add_device_randomness(), * where we can't trust the buffer passed to it is guaranteed to be @@ -455,19 +451,15 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], * Returns the number of bytes processed from input, which is bounded * by CRNG_INIT_CNT_THRESH if account is true. */ -static size_t crng_pre_init_inject(const void *input, size_t len, - bool fast, bool account) +static size_t crng_pre_init_inject(const void *input, size_t len, bool account) { static int crng_init_cnt = 0; + struct blake2s_state hash; unsigned long flags; - if (fast) { - if (!spin_trylock_irqsave(&base_crng.lock, flags)) - return 0; - } else { - spin_lock_irqsave(&base_crng.lock, flags); - } + blake2s_init(&hash, sizeof(base_crng.key)); + spin_lock_irqsave(&base_crng.lock, flags); if (crng_init != 0) { spin_unlock_irqrestore(&base_crng.lock, flags); return 0; @@ -476,21 +468,9 @@ static size_t crng_pre_init_inject(const void *input, size_t len, if (account) len = min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); - if (fast) { - const u8 *src = input; - size_t i; - - for (i = 0; i < len; ++i) - base_crng.key[(crng_init_cnt + i) % - sizeof(base_crng.key)] ^= src[i]; - } else { - struct blake2s_state hash; - - blake2s_init(&hash, sizeof(base_crng.key)); - blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); - blake2s_update(&hash, input, len); - blake2s_final(&hash, base_crng.key); - } + blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); + blake2s_update(&hash, input, len); + blake2s_final(&hash, base_crng.key); if (account) { crng_init_cnt += len; @@ -1034,7 +1014,7 @@ void add_device_randomness(const void *buf, size_t size) unsigned long flags, now = jiffies; if (crng_init == 0 && size) - crng_pre_init_inject(buf, size, false, false); + crng_pre_init_inject(buf, size, false); spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(&cycles, sizeof(cycles)); @@ -1155,7 +1135,7 @@ void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy) { if (unlikely(crng_init == 0)) { - size_t ret = crng_pre_init_inject(buffer, count, false, true); + size_t ret = crng_pre_init_inject(buffer, count, true); mix_pool_bytes(buffer, ret); count -= ret; buffer += ret; @@ -1295,8 +1275,14 @@ static void mix_interrupt_randomness(struct work_struct *work) fast_pool->last = jiffies; local_irq_enable(); - mix_pool_bytes(pool, sizeof(pool)); - credit_entropy_bits(1); + if (unlikely(crng_init == 0)) { + crng_pre_init_inject(pool, sizeof(pool), true); + mix_pool_bytes(pool, sizeof(pool)); + } else { + mix_pool_bytes(pool, sizeof(pool)); + credit_entropy_bits(1); + } + memzero_explicit(pool, sizeof(pool)); } @@ -1329,24 +1315,11 @@ void add_interrupt_randomness(int irq) fast_mix(fast_pool->pool32); new_count = ++fast_pool->count; - if (unlikely(crng_init == 0)) { - if (new_count >= 64 && - crng_pre_init_inject(fast_pool->pool32, sizeof(fast_pool->pool32), - true, true) > 0) { - fast_pool->count = 0; - fast_pool->last = now; - if (spin_trylock(&input_pool.lock)) { - _mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32)); - spin_unlock(&input_pool.lock); - } - } - return; - } - if (new_count & MIX_INFLIGHT) return; - if (new_count < 64 && !time_after(now, fast_pool->last + HZ)) + if (new_count < 64 && (!time_after(now, fast_pool->last + HZ) || + unlikely(crng_init == 0))) return; if (unlikely(!fast_pool->mix.func)) -- GitLab From e4e1600a674fc5b4fabaa578d8854db96fcc28dd Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 28 Feb 2022 13:57:57 +0100 Subject: [PATCH 0218/4335] random: give sysctl_random_min_urandom_seed a more sensible value commit d0efdf35a6a71d307a250199af6fce122a7c7e11 upstream. This isn't used by anything or anywhere, but we can't delete it due to compatibility. So at least give it the correct value of what it's supposed to be instead of a garbage one. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 140514f36f739..389c58e11af52 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1619,7 +1619,7 @@ const struct file_operations urandom_fops = { * to avoid breaking old userspaces, but writing to it does not * change any behavior of the RNG. * - * - urandom_min_reseed_secs - fixed to the meaningless value "60". + * - urandom_min_reseed_secs - fixed to the value CRNG_RESEED_INTERVAL. * It is writable to avoid breaking old userspaces, but writing * to it does not change any behavior of the RNG. * @@ -1629,7 +1629,7 @@ const struct file_operations urandom_fops = { #include -static int sysctl_random_min_urandom_seed = 60; +static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ; static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS; static int sysctl_poolsize = POOL_BITS; static u8 sysctl_bootid[UUID_SIZE]; -- GitLab From 39c9e5566ac590ad2c9e488baaf6058c251b8fb5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 28 Feb 2022 14:00:52 +0100 Subject: [PATCH 0219/4335] random: don't let 644 read-only sysctls be written to commit 77553cf8f44863b31da242cf24671d76ddb61597 upstream. We leave around these old sysctls for compatibility, and we keep them "writable" for compatibility, but even after writing, we should keep reporting the same value. This is consistent with how userspaces tend to use sysctl_random_write_wakeup_bits, writing to it, and then later reading from it and using the value. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 389c58e11af52..ee430c1753a97 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1669,6 +1669,13 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, return proc_dostring(&fake_table, 0, buffer, lenp, ppos); } +/* The same as proc_dointvec, but writes don't change anything. */ +static int proc_do_rointvec(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + return write ? 0 : proc_dointvec(table, 0, buffer, lenp, ppos); +} + extern struct ctl_table random_table[]; struct ctl_table random_table[] = { { @@ -1690,14 +1697,14 @@ struct ctl_table random_table[] = { .data = &sysctl_random_write_wakeup_bits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_do_rointvec, }, { .procname = "urandom_min_reseed_secs", .data = &sysctl_random_min_urandom_seed, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_do_rointvec, }, { .procname = "boot_id", -- GitLab From 6da877d2d46ba59a65352061cafbdac790cb8d68 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 1 Mar 2022 20:03:49 +0100 Subject: [PATCH 0220/4335] random: replace custom notifier chain with standard one commit 5acd35487dc911541672b3ffc322851769c32a56 upstream. We previously rolled our own randomness readiness notifier, which only has two users in the whole kernel. Replace this with a more standard atomic notifier block that serves the same purpose with less code. Also unexport the symbols, because no modules use it, only unconditional builtins. The only drawback is that it's possible for a notification handler returning the "stop" code to prevent further processing, but given that there are only two users, and that we're unexporting this anyway, that doesn't seem like a significant drawback for the simplification we receive here. Cc: Greg Kroah-Hartman Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski [Jason: for stable, also backported to crypto/drbg.c, not unexporting.] Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- crypto/drbg.c | 17 +++++------ drivers/char/random.c | 69 +++++++++++++----------------------------- include/crypto/drbg.h | 2 +- include/linux/random.h | 10 ++---- lib/random32.c | 13 +++++--- lib/vsprintf.c | 10 +++--- 6 files changed, 47 insertions(+), 74 deletions(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index ea85d4a0fe9e9..03c9ef768c227 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1491,12 +1491,13 @@ static int drbg_generate_long(struct drbg_state *drbg, return 0; } -static void drbg_schedule_async_seed(struct random_ready_callback *rdy) +static int drbg_schedule_async_seed(struct notifier_block *nb, unsigned long action, void *data) { - struct drbg_state *drbg = container_of(rdy, struct drbg_state, + struct drbg_state *drbg = container_of(nb, struct drbg_state, random_ready); schedule_work(&drbg->seed_work); + return 0; } static int drbg_prepare_hrng(struct drbg_state *drbg) @@ -1511,10 +1512,8 @@ static int drbg_prepare_hrng(struct drbg_state *drbg) INIT_WORK(&drbg->seed_work, drbg_async_seed); - drbg->random_ready.owner = THIS_MODULE; - drbg->random_ready.func = drbg_schedule_async_seed; - - err = add_random_ready_callback(&drbg->random_ready); + drbg->random_ready.notifier_call = drbg_schedule_async_seed; + err = register_random_ready_notifier(&drbg->random_ready); switch (err) { case 0: @@ -1525,7 +1524,7 @@ static int drbg_prepare_hrng(struct drbg_state *drbg) fallthrough; default: - drbg->random_ready.func = NULL; + drbg->random_ready.notifier_call = NULL; return err; } @@ -1629,8 +1628,8 @@ free_everything: */ static int drbg_uninstantiate(struct drbg_state *drbg) { - if (drbg->random_ready.func) { - del_random_ready_callback(&drbg->random_ready); + if (drbg->random_ready.notifier_call) { + unregister_random_ready_notifier(&drbg->random_ready); cancel_work_sync(&drbg->seed_work); } diff --git a/drivers/char/random.c b/drivers/char/random.c index ee430c1753a97..1421bd53f5763 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -83,8 +83,8 @@ static int crng_init = 0; /* Various types of waiters for crng_init->2 transition. */ static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); static struct fasync_struct *fasync; -static DEFINE_SPINLOCK(random_ready_list_lock); -static LIST_HEAD(random_ready_list); +static DEFINE_SPINLOCK(random_ready_chain_lock); +static RAW_NOTIFIER_HEAD(random_ready_chain); /* Control how we warn userspace. */ static struct ratelimit_state unseeded_warning = @@ -147,72 +147,45 @@ EXPORT_SYMBOL(wait_for_random_bytes); * * returns: 0 if callback is successfully added * -EALREADY if pool is already initialised (callback not called) - * -ENOENT if module for callback is not alive */ -int add_random_ready_callback(struct random_ready_callback *rdy) +int register_random_ready_notifier(struct notifier_block *nb) { - struct module *owner; unsigned long flags; - int err = -EALREADY; + int ret = -EALREADY; if (crng_ready()) - return err; - - owner = rdy->owner; - if (!try_module_get(owner)) - return -ENOENT; - - spin_lock_irqsave(&random_ready_list_lock, flags); - if (crng_ready()) - goto out; - - owner = NULL; - - list_add(&rdy->list, &random_ready_list); - err = 0; - -out: - spin_unlock_irqrestore(&random_ready_list_lock, flags); - - module_put(owner); + return ret; - return err; + spin_lock_irqsave(&random_ready_chain_lock, flags); + if (!crng_ready()) + ret = raw_notifier_chain_register(&random_ready_chain, nb); + spin_unlock_irqrestore(&random_ready_chain_lock, flags); + return ret; } -EXPORT_SYMBOL(add_random_ready_callback); +EXPORT_SYMBOL(register_random_ready_notifier); /* * Delete a previously registered readiness callback function. */ -void del_random_ready_callback(struct random_ready_callback *rdy) +int unregister_random_ready_notifier(struct notifier_block *nb) { unsigned long flags; - struct module *owner = NULL; - - spin_lock_irqsave(&random_ready_list_lock, flags); - if (!list_empty(&rdy->list)) { - list_del_init(&rdy->list); - owner = rdy->owner; - } - spin_unlock_irqrestore(&random_ready_list_lock, flags); + int ret; - module_put(owner); + spin_lock_irqsave(&random_ready_chain_lock, flags); + ret = raw_notifier_chain_unregister(&random_ready_chain, nb); + spin_unlock_irqrestore(&random_ready_chain_lock, flags); + return ret; } -EXPORT_SYMBOL(del_random_ready_callback); +EXPORT_SYMBOL(unregister_random_ready_notifier); static void process_random_ready_list(void) { unsigned long flags; - struct random_ready_callback *rdy, *tmp; - spin_lock_irqsave(&random_ready_list_lock, flags); - list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) { - struct module *owner = rdy->owner; - - list_del_init(&rdy->list); - rdy->func(rdy); - module_put(owner); - } - spin_unlock_irqrestore(&random_ready_list_lock, flags); + spin_lock_irqsave(&random_ready_chain_lock, flags); + raw_notifier_call_chain(&random_ready_chain, 0, NULL); + spin_unlock_irqrestore(&random_ready_chain_lock, flags); } #define warn_unseeded_randomness(previous) \ diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index c4165126937e4..88e4d145f7cda 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -136,7 +136,7 @@ struct drbg_state { const struct drbg_state_ops *d_ops; const struct drbg_core *core; struct drbg_string test_data; - struct random_ready_callback random_ready; + struct notifier_block random_ready; }; static inline __u8 drbg_statelen(struct drbg_state *drbg) diff --git a/include/linux/random.h b/include/linux/random.h index 6148b8d1ccf34..d4abda9e23487 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -10,11 +10,7 @@ #include -struct random_ready_callback { - struct list_head list; - void (*func)(struct random_ready_callback *rdy); - struct module *owner; -}; +struct notifier_block; extern void add_device_randomness(const void *, size_t); extern void add_bootloader_randomness(const void *, size_t); @@ -39,8 +35,8 @@ extern void get_random_bytes(void *buf, size_t nbytes); extern int wait_for_random_bytes(void); extern int __init rand_initialize(void); extern bool rng_is_initialized(void); -extern int add_random_ready_callback(struct random_ready_callback *rdy); -extern void del_random_ready_callback(struct random_ready_callback *rdy); +extern int register_random_ready_notifier(struct notifier_block *nb); +extern int unregister_random_ready_notifier(struct notifier_block *nb); extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes); #ifndef MODULE diff --git a/lib/random32.c b/lib/random32.c index 3c19820796d04..f0ab17c2244be 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -40,6 +40,7 @@ #include #include #include +#include #include /** @@ -551,9 +552,11 @@ static void prandom_reseed(struct timer_list *unused) * To avoid worrying about whether it's safe to delay that interrupt * long enough to seed all CPUs, just schedule an immediate timer event. */ -static void prandom_timer_start(struct random_ready_callback *unused) +static int prandom_timer_start(struct notifier_block *nb, + unsigned long action, void *data) { mod_timer(&seed_timer, jiffies); + return 0; } #ifdef CONFIG_RANDOM32_SELFTEST @@ -617,13 +620,13 @@ core_initcall(prandom32_state_selftest); */ static int __init prandom_init_late(void) { - static struct random_ready_callback random_ready = { - .func = prandom_timer_start + static struct notifier_block random_ready = { + .notifier_call = prandom_timer_start }; - int ret = add_random_ready_callback(&random_ready); + int ret = register_random_ready_notifier(&random_ready); if (ret == -EALREADY) { - prandom_timer_start(&random_ready); + prandom_timer_start(&random_ready, 0, NULL); ret = 0; } return ret; diff --git a/lib/vsprintf.c b/lib/vsprintf.c index a3b73f28b7a9c..a60f0bb2ea902 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -761,14 +761,16 @@ static void enable_ptr_key_workfn(struct work_struct *work) static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn); -static void fill_random_ptr_key(struct random_ready_callback *unused) +static int fill_random_ptr_key(struct notifier_block *nb, + unsigned long action, void *data) { /* This may be in an interrupt handler. */ queue_work(system_unbound_wq, &enable_ptr_key_work); + return 0; } -static struct random_ready_callback random_ready = { - .func = fill_random_ptr_key +static struct notifier_block random_ready = { + .notifier_call = fill_random_ptr_key }; static int __init initialize_ptr_random(void) @@ -782,7 +784,7 @@ static int __init initialize_ptr_random(void) return 0; } - ret = add_random_ready_callback(&random_ready); + ret = register_random_ready_notifier(&random_ready); if (!ret) { return 0; } else if (ret == -EALREADY) { -- GitLab From 0c66c876393b06597843900310423a298fb2e03c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 14:58:44 +0100 Subject: [PATCH 0221/4335] random: use SipHash as interrupt entropy accumulator commit f5eab0e2db4f881fb2b62b3fdad5b9be673dd7ae upstream. The current fast_mix() function is a piece of classic mailing list crypto, where it just sort of sprung up by an anonymous author without a lot of real analysis of what precisely it was accomplishing. As an ARX permutation alone, there are some easily searchable differential trails in it, and as a means of preventing malicious interrupts, it completely fails, since it xors new data into the entire state every time. It can't really be analyzed as a random permutation, because it clearly isn't, and it can't be analyzed as an interesting linear algebraic structure either, because it's also not that. There really is very little one can say about it in terms of entropy accumulation. It might diffuse bits, some of the time, maybe, we hope, I guess. But for the most part, it fails to accomplish anything concrete. As a reminder, the simple goal of add_interrupt_randomness() is to simply accumulate entropy until ~64 interrupts have elapsed, and then dump it into the main input pool, which uses a cryptographic hash. It would be nice to have something cryptographically strong in the interrupt handler itself, in case a malicious interrupt compromises a per-cpu fast pool within the 64 interrupts / 1 second window, and then inside of that same window somehow can control its return address and cycle counter, even if that's a bit far fetched. However, with a very CPU-limited budget, actually doing that remains an active research project (and perhaps there'll be something useful for Linux to come out of it). And while the abundance of caution would be nice, this isn't *currently* the security model, and we don't yet have a fast enough solution to make it our security model. Plus there's not exactly a pressing need to do that. (And for the avoidance of doubt, the actual cluster of 64 accumulated interrupts still gets dumped into our cryptographically secure input pool.) So, for now we are going to stick with the existing interrupt security model, which assumes that each cluster of 64 interrupt data samples is mostly non-malicious and not colluding with an infoleaker. With this as our goal, we have a few more choices, simply aiming to accumulate entropy, while discarding the least amount of it. We know from that random oracles, instantiated as computational hash functions, make good entropy accumulators and extractors, which is the justification for using BLAKE2s in the main input pool. As mentioned, we don't have that luxury here, but we also don't have the same security model requirements, because we're assuming that there aren't malicious inputs. A pseudorandom function instance can approximately behave like a random oracle, provided that the key is uniformly random. But since we're not concerned with malicious inputs, we can pick a fixed key, which is not secret, knowing that "nature" won't interact with a sufficiently chosen fixed key by accident. So we pick a PRF with a fixed initial key, and accumulate into it continuously, dumping the result every 64 interrupts into our cryptographically secure input pool. For this, we make use of SipHash-1-x on 64-bit and HalfSipHash-1-x on 32-bit, which are already in use in the kernel's hsiphash family of functions and achieve the same performance as the function they replace. It would be nice to do two rounds, but we don't exactly have the CPU budget handy for that, and one round alone is already sufficient. As mentioned, we start with a fixed initial key (zeros is fine), and allow SipHash's symmetry breaking constants to turn that into a useful starting point. Also, since we're dumping the result (or half of it on 64-bit so as to tax our hash function the same amount on all platforms) into the cryptographically secure input pool, there's no point in finalizing SipHash's output, since it'll wind up being finalized by something much stronger. This means that all we need to do is use the ordinary round function word-by-word, as normal SipHash does. Simplified, the flow is as follows: Initialize: siphash_state_t state; siphash_init(&state, key={0, 0, 0, 0}); Update (accumulate) on interrupt: siphash_update(&state, interrupt_data_and_timing); Dump into input pool after 64 interrupts: blake2s_update(&input_pool, &state, sizeof(state) / 2); The result of all of this is that the security model is unchanged from before -- we assume non-malicious inputs -- yet we now implement that model with a stronger argument. I would like to emphasize, again, that the purpose of this commit is to improve the existing design, by making it analyzable, without changing any fundamental assumptions. There may well be value down the road in changing up the existing design, using something cryptographically strong, or simply using a ring buffer of samples rather than having a fast_mix() at all, or changing which and how much data we collect each interrupt so that we can use something linear, or a variety of other ideas. This commit does not invalidate the potential for those in the future. For example, in the future, if we're able to characterize the data we're collecting on each interrupt, we may be able to inch toward information theoretic accumulators. shows that `s = ror32(s, 7) ^ x` and `s = ror64(s, 19) ^ x` make very good accumulators for 2-monotone distributions, which would apply to timestamp counters, like random_get_entropy() or jiffies, but would not apply to our current combination of the two values, or to the various function addresses and register values we mix in. Alternatively, shows that max-period linear functions with no non-trivial invariant subspace make good extractors, used in the form `s = f(s) ^ x`. However, this only works if the input data is both identical and independent, and obviously a collection of address values and counters fails; so it goes with theoretical papers. Future directions here may involve trying to characterize more precisely what we actually need to collect in the interrupt handler, and building something specific around that. However, as mentioned, the morass of data we're gathering at the interrupt handler presently defies characterization, and so we use SipHash for now, which works well and performs well. Cc: Theodore Ts'o Cc: Greg Kroah-Hartman Reviewed-by: Jean-Philippe Aumasson Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 94 +++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 39 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 1421bd53f5763..fe2533a1642ac 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1147,48 +1147,51 @@ void add_bootloader_randomness(const void *buf, size_t size) EXPORT_SYMBOL_GPL(add_bootloader_randomness); struct fast_pool { - union { - u32 pool32[4]; - u64 pool64[2]; - }; struct work_struct mix; + unsigned long pool[4]; unsigned long last; unsigned int count; u16 reg_idx; }; +static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = { +#ifdef CONFIG_64BIT + /* SipHash constants */ + .pool = { 0x736f6d6570736575UL, 0x646f72616e646f6dUL, + 0x6c7967656e657261UL, 0x7465646279746573UL } +#else + /* HalfSipHash constants */ + .pool = { 0, 0, 0x6c796765U, 0x74656462U } +#endif +}; + /* - * This is a fast mixing routine used by the interrupt randomness - * collector. It's hardcoded for an 128 bit pool and assumes that any - * locks that might be needed are taken by the caller. + * This is [Half]SipHash-1-x, starting from an empty key. Because + * the key is fixed, it assumes that its inputs are non-malicious, + * and therefore this has no security on its own. s represents the + * 128 or 256-bit SipHash state, while v represents a 128-bit input. */ -static void fast_mix(u32 pool[4]) +static void fast_mix(unsigned long s[4], const unsigned long *v) { - u32 a = pool[0], b = pool[1]; - u32 c = pool[2], d = pool[3]; - - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; + size_t i; - pool[0] = a; pool[1] = b; - pool[2] = c; pool[3] = d; + for (i = 0; i < 16 / sizeof(long); ++i) { + s[3] ^= v[i]; +#ifdef CONFIG_64BIT + s[0] += s[1]; s[1] = rol64(s[1], 13); s[1] ^= s[0]; s[0] = rol64(s[0], 32); + s[2] += s[3]; s[3] = rol64(s[3], 16); s[3] ^= s[2]; + s[0] += s[3]; s[3] = rol64(s[3], 21); s[3] ^= s[0]; + s[2] += s[1]; s[1] = rol64(s[1], 17); s[1] ^= s[2]; s[2] = rol64(s[2], 32); +#else + s[0] += s[1]; s[1] = rol32(s[1], 5); s[1] ^= s[0]; s[0] = rol32(s[0], 16); + s[2] += s[3]; s[3] = rol32(s[3], 8); s[3] ^= s[2]; + s[0] += s[3]; s[3] = rol32(s[3], 7); s[3] ^= s[0]; + s[2] += s[1]; s[1] = rol32(s[1], 13); s[1] ^= s[2]; s[2] = rol32(s[2], 16); +#endif + s[0] ^= v[i]; + } } -static DEFINE_PER_CPU(struct fast_pool, irq_randomness); - #ifdef CONFIG_SMP /* * This function is called when the CPU has just come online, with @@ -1230,7 +1233,15 @@ static unsigned long get_reg(struct fast_pool *f, struct pt_regs *regs) static void mix_interrupt_randomness(struct work_struct *work) { struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix); - u32 pool[4]; + /* + * The size of the copied stack pool is explicitly 16 bytes so that we + * tax mix_pool_byte()'s compression function the same amount on all + * platforms. This means on 64-bit we copy half the pool into this, + * while on 32-bit we copy all of it. The entropy is supposed to be + * sufficiently dispersed between bits that in the sponge-like + * half case, on average we don't wind up "losing" some. + */ + u8 pool[16]; /* Check to see if we're running on the wrong CPU due to hotplug. */ local_irq_disable(); @@ -1243,7 +1254,7 @@ static void mix_interrupt_randomness(struct work_struct *work) * Copy the pool to the stack so that the mixer always has a * consistent view, before we reenable irqs again. */ - memcpy(pool, fast_pool->pool32, sizeof(pool)); + memcpy(pool, fast_pool->pool, sizeof(pool)); fast_pool->count = 0; fast_pool->last = jiffies; local_irq_enable(); @@ -1267,25 +1278,30 @@ void add_interrupt_randomness(int irq) struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned int new_count; + union { + u32 u32[4]; + u64 u64[2]; + unsigned long longs[16 / sizeof(long)]; + } irq_data; if (cycles == 0) cycles = get_reg(fast_pool, regs); if (sizeof(cycles) == 8) - fast_pool->pool64[0] ^= cycles ^ rol64(now, 32) ^ irq; + irq_data.u64[0] = cycles ^ rol64(now, 32) ^ irq; else { - fast_pool->pool32[0] ^= cycles ^ irq; - fast_pool->pool32[1] ^= now; + irq_data.u32[0] = cycles ^ irq; + irq_data.u32[1] = now; } if (sizeof(unsigned long) == 8) - fast_pool->pool64[1] ^= regs ? instruction_pointer(regs) : _RET_IP_; + irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_; else { - fast_pool->pool32[2] ^= regs ? instruction_pointer(regs) : _RET_IP_; - fast_pool->pool32[3] ^= get_reg(fast_pool, regs); + irq_data.u32[2] = regs ? instruction_pointer(regs) : _RET_IP_; + irq_data.u32[3] = get_reg(fast_pool, regs); } - fast_mix(fast_pool->pool32); + fast_mix(fast_pool->pool, irq_data.longs); new_count = ++fast_pool->count; if (new_count & MIX_INFLIGHT) -- GitLab From 99290c6898b22e57ccfb53a34f93abf61b9dad26 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 8 Mar 2022 11:20:17 -0700 Subject: [PATCH 0222/4335] random: make consistent usage of crng_ready() commit a96cfe2d427064325ecbf56df8816c6b871ec285 upstream. Rather than sometimes checking `crng_init < 2`, we should always use the crng_ready() macro, so that should we change anything later, it's consistent. Additionally, that macro already has a likely() around it, which means we don't need to open code our own likely() and unlikely() annotations. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index fe2533a1642ac..90a3a09999d52 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -125,18 +125,13 @@ static void try_to_generate_entropy(void); */ int wait_for_random_bytes(void) { - if (likely(crng_ready())) - return 0; - - do { + while (!crng_ready()) { int ret; ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); if (ret) return ret > 0 ? 0 : ret; - try_to_generate_entropy(); - } while (!crng_ready()); - + } return 0; } EXPORT_SYMBOL(wait_for_random_bytes); @@ -293,7 +288,7 @@ static void crng_reseed(void) ++next_gen; WRITE_ONCE(base_crng.generation, next_gen); WRITE_ONCE(base_crng.birth, jiffies); - if (crng_init < 2) { + if (!crng_ready()) { crng_init = 2; finalize_init = true; } @@ -361,7 +356,7 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], * ready, we do fast key erasure with the base_crng directly, because * this is what crng_pre_init_inject() mutates during early init. */ - if (unlikely(!crng_ready())) { + if (!crng_ready()) { bool ready; spin_lock_irqsave(&base_crng.lock, flags); @@ -804,7 +799,7 @@ static void credit_entropy_bits(size_t nbits) entropy_count = min_t(unsigned int, POOL_BITS, orig + add); } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); - if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) + if (!crng_ready() && entropy_count >= POOL_MIN_BITS) crng_reseed(); } @@ -961,7 +956,7 @@ int __init rand_initialize(void) extract_entropy(base_crng.key, sizeof(base_crng.key)); ++base_crng.generation; - if (arch_init && trust_cpu && crng_init < 2) { + if (arch_init && trust_cpu && !crng_ready()) { crng_init = 2; pr_notice("crng init done (trusting CPU's manufacturer)\n"); } @@ -1550,7 +1545,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) case RNDRESEEDCRNG: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (crng_init < 2) + if (!crng_ready()) return -ENODATA; crng_reseed(); return 0; -- GitLab From 94c8249efa7651cd9fb3e4581c0c8091cd964455 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 8 Mar 2022 23:32:34 -0700 Subject: [PATCH 0223/4335] random: reseed more often immediately after booting commit 7a7ff644aeaf071d433caffb3b8ea57354b55bd3 upstream. In order to chip away at the "premature first" problem, we augment our existing entropy accounting with more frequent reseedings at boot. The idea is that at boot, we're getting entropy from various places, and we're not very sure which of early boot entropy is good and which isn't. Even when we're crediting the entropy, we're still not totally certain that it's any good. Since boot is the one time (aside from a compromise) that we have zero entropy, it's important that we shepherd entropy into the crng fairly often. At the same time, we don't want a "premature next" problem, whereby an attacker can brute force individual bits of added entropy. In lieu of going full-on Fortuna (for now), we can pick a simpler strategy of just reseeding more often during the first 5 minutes after boot. This is still bounded by the 256-bit entropy credit requirement, so we'll skip a reseeding if we haven't reached that, but in case entropy /is/ coming in, this ensures that it makes its way into the crng rather rapidly during these early stages. Ordinarily we reseed if the previous reseeding is 300 seconds old. This commit changes things so that for the first 600 seconds of boot time, we reseed if the previous reseeding is uptime / 2 seconds old. That means that we'll reseed at the very least double the uptime of the previous reseeding. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 90a3a09999d52..4187cc8e4cb74 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -337,6 +337,28 @@ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], memzero_explicit(first_block, sizeof(first_block)); } +/* + * Return whether the crng seed is considered to be sufficiently + * old that a reseeding might be attempted. This happens if the last + * reseeding was CRNG_RESEED_INTERVAL ago, or during early boot, at + * an interval proportional to the uptime. + */ +static bool crng_has_old_seed(void) +{ + static bool early_boot = true; + unsigned long interval = CRNG_RESEED_INTERVAL; + + if (unlikely(READ_ONCE(early_boot))) { + time64_t uptime = ktime_get_seconds(); + if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2) + WRITE_ONCE(early_boot, false); + else + interval = max_t(unsigned int, 5 * HZ, + (unsigned int)uptime / 2 * HZ); + } + return time_after(jiffies, READ_ONCE(base_crng.birth) + interval); +} + /* * This function returns a ChaCha state that you may use for generating * random data. It also returns up to 32 bytes on its own of random data @@ -370,10 +392,10 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], } /* - * If the base_crng is more than 5 minutes old, we reseed, which - * in turn bumps the generation counter that we check below. + * If the base_crng is old enough, we try to reseed, which in turn + * bumps the generation counter that we check below. */ - if (unlikely(time_after(jiffies, READ_ONCE(base_crng.birth) + CRNG_RESEED_INTERVAL))) + if (unlikely(crng_has_old_seed())) crng_reseed(); local_lock_irqsave(&crngs.lock, flags); -- GitLab From 3689ac035180926ee70ceede0133d24af4605fa9 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 8 Mar 2022 10:12:16 -0700 Subject: [PATCH 0224/4335] random: check for signal and try earlier when generating entropy commit 3e504d2026eb6c8762cd6040ae57db166516824a upstream. Rather than waiting a full second in an interruptable waiter before trying to generate entropy, try to generate entropy first and wait second. While waiting one second might give an extra second for getting entropy from elsewhere, we're already pretty late in the init process here, and whatever else is generating entropy will still continue to contribute. This has implications on signal handling: we call try_to_generate_entropy() from wait_for_random_bytes(), and wait_for_random_bytes() always uses wait_event_interruptible_timeout() when waiting, since it's called by userspace code in restartable contexts, where signals can pend. Since try_to_generate_entropy() now runs first, if a signal is pending, it's necessary for try_to_generate_entropy() to check for signals, since it won't hit the wait until after try_to_generate_entropy() has returned. And even before this change, when entering a busy loop in try_to_generate_entropy(), we should have been checking to see if any signals are pending, so that a process doesn't get stuck in that loop longer than expected. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 4187cc8e4cb74..4c0a12c9052bc 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -127,10 +127,11 @@ int wait_for_random_bytes(void) { while (!crng_ready()) { int ret; + + try_to_generate_entropy(); ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); if (ret) return ret > 0 ? 0 : ret; - try_to_generate_entropy(); } return 0; } @@ -1371,7 +1372,7 @@ static void try_to_generate_entropy(void) return; timer_setup_on_stack(&stack.timer, entropy_timer, 0); - while (!crng_ready()) { + while (!crng_ready() && !signal_pending(current)) { if (!timer_pending(&stack.timer)) mod_timer(&stack.timer, jiffies + 1); mix_pool_bytes(&stack.cycles, sizeof(stack.cycles)); -- GitLab From a704248b4590ce5738360935ea688e7369c4ed6f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 21 Mar 2022 18:48:05 -0600 Subject: [PATCH 0225/4335] random: skip fast_init if hwrng provides large chunk of entropy commit af704c856e888fb044b058d731d61b46eeec499d upstream. At boot time, EFI calls add_bootloader_randomness(), which in turn calls add_hwgenerator_randomness(). Currently add_hwgenerator_randomness() feeds the first 64 bytes of randomness to the "fast init" non-crypto-grade phase. But if add_hwgenerator_randomness() gets called with more than POOL_MIN_BITS of entropy, there's no point in passing it off to the "fast init" stage, since that's enough entropy to bootstrap the real RNG. The "fast init" stage is just there to provide _something_ in the case where we don't have enough entropy to properly bootstrap the RNG. But if we do have enough entropy to bootstrap the RNG, the current logic doesn't serve a purpose. So, in the case where we're passed greater than or equal to POOL_MIN_BITS of entropy, this commit makes us skip the "fast init" phase. Cc: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 4c0a12c9052bc..382ff01aeb781 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1125,7 +1125,7 @@ void rand_initialize_disk(struct gendisk *disk) void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy) { - if (unlikely(crng_init == 0)) { + if (unlikely(crng_init == 0 && entropy < POOL_MIN_BITS)) { size_t ret = crng_pre_init_inject(buffer, count, true); mix_pool_bytes(buffer, ret); count -= ret; -- GitLab From 25727cbbe9b462ca2ccb9a9de9e122e32328ab8e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 22 Mar 2022 21:43:12 -0600 Subject: [PATCH 0226/4335] random: treat bootloader trust toggle the same way as cpu trust toggle commit d97c68d178fbf8aaaf21b69b446f2dfb13909316 upstream. If CONFIG_RANDOM_TRUST_CPU is set, the RNG initializes using RDRAND. But, the user can disable (or enable) this behavior by setting `random.trust_cpu=0/1` on the kernel command line. This allows system builders to do reasonable things while avoiding howls from tinfoil hatters. (Or vice versa.) CONFIG_RANDOM_TRUST_BOOTLOADER is basically the same thing, but regards the seed passed via EFI or device tree, which might come from RDRAND or a TPM or somewhere else. In order to allow distros to more easily enable this while avoiding those same howls (or vice versa), this commit adds the corresponding `random.trust_bootloader=0/1` toggle. Cc: Theodore Ts'o Cc: Graham Christensen Reviewed-by: Ard Biesheuvel Reviewed-by: Dominik Brodowski Link: https://github.com/NixOS/nixpkgs/pull/165355 Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 6 ++++++ drivers/char/Kconfig | 3 ++- drivers/char/random.c | 8 +++++++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bb25aae698609..fd3d14dabadcb 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4307,6 +4307,12 @@ fully seed the kernel's CRNG. Default is controlled by CONFIG_RANDOM_TRUST_CPU. + random.trust_bootloader={on,off} + [KNL] Enable or disable trusting the use of a + seed passed by the bootloader (if available) to + fully seed the kernel's CRNG. Default is controlled + by CONFIG_RANDOM_TRUST_BOOTLOADER. + randomize_kstack_offset= [KNL] Enable or disable kernel stack offset randomization, which provides roughly 5 bits of diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 740811893c570..55f48375e3fe5 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -449,6 +449,7 @@ config RANDOM_TRUST_BOOTLOADER device randomness. Say Y here to assume the entropy provided by the booloader is trustworthy so it will be added to the kernel's entropy pool. Otherwise, say N here so it will be regarded as device input that - only mixes the entropy pool. + only mixes the entropy pool. This can also be configured at boot with + "random.trust_bootloader=on/off". endmenu diff --git a/drivers/char/random.c b/drivers/char/random.c index 382ff01aeb781..fd7b234c2d3ec 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -945,11 +945,17 @@ static bool drain_entropy(void *buf, size_t nbytes) **********************************************************************/ static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); +static bool trust_bootloader __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER); static int __init parse_trust_cpu(char *arg) { return kstrtobool(arg, &trust_cpu); } +static int __init parse_trust_bootloader(char *arg) +{ + return kstrtobool(arg, &trust_bootloader); +} early_param("random.trust_cpu", parse_trust_cpu); +early_param("random.trust_bootloader", parse_trust_bootloader); /* * The first collection of entropy occurs at system boot while interrupts @@ -1157,7 +1163,7 @@ EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); */ void add_bootloader_randomness(const void *buf, size_t size) { - if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER)) + if (trust_bootloader) add_hwgenerator_randomness(buf, size, size * 8); else add_device_randomness(buf, size); -- GitLab From 3967a200367c933bc52b7d32681025f843e77642 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 22 Mar 2022 22:21:52 -0600 Subject: [PATCH 0227/4335] random: re-add removed comment about get_random_{u32,u64} reseeding commit dd7aa36e535797926d8eb311da7151919130139d upstream. The comment about get_random_{u32,u64}() not invoking reseeding got added in an unrelated commit, that then was recently reverted by 0313bc278dac ("Revert "random: block in /dev/urandom""). So this adds that little comment snippet back, and improves the wording a bit too. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index fd7b234c2d3ec..d6b8a14c25b66 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -226,9 +226,10 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void * * These interfaces will return the requested number of random bytes * into the given buffer or as a return value. This is equivalent to - * a read from /dev/urandom. The integer family of functions may be - * higher performance for one-off random integers, because they do a - * bit of buffering. + * a read from /dev/urandom. The u32, u64, int, and long family of + * functions may be higher performance for one-off random integers, + * because they do a bit of buffering and do not invoke reseeding + * until the buffer is emptied. * *********************************************************************/ -- GitLab From 636b057e2a1a7b64333c5f5ce95401a096e59823 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 31 Mar 2022 11:01:01 -0400 Subject: [PATCH 0228/4335] random: mix build-time latent entropy into pool at init commit 1754abb3e7583c570666fa1e1ee5b317e88c89a0 upstream. Prior, the "input_pool_data" array needed no real initialization, and so it was easy to mark it with __latent_entropy to populate it during compile-time. In switching to using a hash function, this required us to specifically initialize it to some specific state, which means we dropped the __latent_entropy attribute. An unfortunate side effect was this meant the pool was no longer seeded using compile-time random data. In order to bring this back, we declare an array in rand_initialize() with __latent_entropy and call mix_pool_bytes() on that at init, which accomplishes the same thing as before. We make this __initconst, so that it doesn't take up space at runtime after init. Fixes: 6e8ec2552c7d ("random: use computational hash for entropy extraction") Reviewed-by: Dominik Brodowski Reviewed-by: Theodore Ts'o Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index d6b8a14c25b66..e21e645836c5a 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -972,6 +972,11 @@ int __init rand_initialize(void) bool arch_init = true; unsigned long rv; +#if defined(LATENT_ENTROPY_PLUGIN) + static const u8 compiletime_seed[BLAKE2S_BLOCK_SIZE] __initconst __latent_entropy; + _mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed)); +#endif + for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) { if (!arch_get_random_seed_long_early(&rv) && !arch_get_random_long_early(&rv)) { -- GitLab From be7561767ce84ceb1a902b190e1715b3f4719932 Mon Sep 17 00:00:00 2001 From: Jan Varho Date: Mon, 4 Apr 2022 19:42:30 +0300 Subject: [PATCH 0229/4335] random: do not split fast init input in add_hwgenerator_randomness() commit 527a9867af29ff89f278d037db704e0ed50fb666 upstream. add_hwgenerator_randomness() tries to only use the required amount of input for fast init, but credits all the entropy, rather than a fraction of it. Since it's hard to determine how much entropy is left over out of a non-unformly random sample, either give it all to fast init or credit it, but don't attempt to do both. In the process, we can clean up the injection code to no longer need to return a value. Signed-off-by: Jan Varho [Jason: expanded commit message] Fixes: 73c7733f122e ("random: do not throw away excess input to crng_fast_load") Cc: stable@vger.kernel.org # 5.17+, requires af704c856e88 Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index e21e645836c5a..ae38e4edcbd73 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -439,11 +439,8 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], * This shouldn't be set by functions like add_device_randomness(), * where we can't trust the buffer passed to it is guaranteed to be * unpredictable (so it might not have any entropy at all). - * - * Returns the number of bytes processed from input, which is bounded - * by CRNG_INIT_CNT_THRESH if account is true. */ -static size_t crng_pre_init_inject(const void *input, size_t len, bool account) +static void crng_pre_init_inject(const void *input, size_t len, bool account) { static int crng_init_cnt = 0; struct blake2s_state hash; @@ -454,18 +451,15 @@ static size_t crng_pre_init_inject(const void *input, size_t len, bool account) spin_lock_irqsave(&base_crng.lock, flags); if (crng_init != 0) { spin_unlock_irqrestore(&base_crng.lock, flags); - return 0; + return; } - if (account) - len = min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); - blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); blake2s_update(&hash, input, len); blake2s_final(&hash, base_crng.key); if (account) { - crng_init_cnt += len; + crng_init_cnt += min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { ++base_crng.generation; crng_init = 1; @@ -476,8 +470,6 @@ static size_t crng_pre_init_inject(const void *input, size_t len, bool account) if (crng_init == 1) pr_notice("fast init done\n"); - - return len; } static void _get_random_bytes(void *buf, size_t nbytes) @@ -1138,12 +1130,9 @@ void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy) { if (unlikely(crng_init == 0 && entropy < POOL_MIN_BITS)) { - size_t ret = crng_pre_init_inject(buffer, count, true); - mix_pool_bytes(buffer, ret); - count -= ret; - buffer += ret; - if (!count || crng_init == 0) - return; + crng_pre_init_inject(buffer, count, true); + mix_pool_bytes(buffer, count); + return; } /* -- GitLab From e7e196e1ae2603a2c5f1894f1868a7a5b5a2c5e0 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 5 Apr 2022 16:40:51 +0200 Subject: [PATCH 0230/4335] random: do not allow user to keep crng key around on stack commit aba120cc101788544aa3e2c30c8da88513892350 upstream. The fast key erasure RNG design relies on the key that's used to be used and then discarded. We do this, making judicious use of memzero_explicit(). However, reads to /dev/urandom and calls to getrandom() involve a copy_to_user(), and userspace can use FUSE or userfaultfd, or make a massive call, dynamically remap memory addresses as it goes, and set the process priority to idle, in order to keep a kernel stack alive indefinitely. By probing /proc/sys/kernel/random/entropy_avail to learn when the crng key is refreshed, a malicious userspace could mount this attack every 5 minutes thereafter, breaking the crng's forward secrecy. In order to fix this, we just overwrite the stack's key with the first 32 bytes of the "free" fast key erasure output. If we're returning <= 32 bytes to the user, then we can still return those bytes directly, so that short reads don't become slower. And for long reads, the difference is hopefully lost in the amortization, so it doesn't change much, with that amortization helping variously for medium reads. We don't need to do this for get_random_bytes() and the various kernel-space callers, and later, if we ever switch to always batching, this won't be necessary either, so there's no need to change the API of these functions. Cc: Theodore Ts'o Reviewed-by: Jann Horn Fixes: c92e040d575a ("random: add backtracking protection to the CRNG") Fixes: 186873c549df ("random: use simpler fast key erasure flow on per-cpu keys") Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index ae38e4edcbd73..ab366f50cd9c6 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -534,19 +534,29 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) if (!nbytes) return 0; - len = min_t(size_t, 32, nbytes); - crng_make_state(chacha_state, output, len); - - if (copy_to_user(buf, output, len)) - return -EFAULT; - nbytes -= len; - buf += len; - ret += len; + /* + * Immediately overwrite the ChaCha key at index 4 with random + * bytes, in case userspace causes copy_to_user() below to sleep + * forever, so that we still retain forward secrecy in that case. + */ + crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE); + /* + * However, if we're doing a read of len <= 32, we don't need to + * use chacha_state after, so we can simply return those bytes to + * the user directly. + */ + if (nbytes <= CHACHA_KEY_SIZE) { + ret = copy_to_user(buf, &chacha_state[4], nbytes) ? -EFAULT : nbytes; + goto out_zero_chacha; + } - while (nbytes) { + do { if (large_request && need_resched()) { - if (signal_pending(current)) + if (signal_pending(current)) { + if (!ret) + ret = -ERESTARTSYS; break; + } schedule(); } @@ -563,10 +573,11 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) nbytes -= len; buf += len; ret += len; - } + } while (nbytes); - memzero_explicit(chacha_state, sizeof(chacha_state)); memzero_explicit(output, sizeof(output)); +out_zero_chacha: + memzero_explicit(chacha_state, sizeof(chacha_state)); return ret; } -- GitLab From 02c2e2ca36101ab650f9444ab9e7fe5b8b00bbf3 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 5 Apr 2022 18:39:31 +0200 Subject: [PATCH 0231/4335] random: check for signal_pending() outside of need_resched() check commit 1448769c9cdb69ad65287f4f7ab58bc5f2f5d7ba upstream. signal_pending() checks TIF_NOTIFY_SIGNAL and TIF_SIGPENDING, which signal that the task should bail out of the syscall when possible. This is a separate concept from need_resched(), which checks TIF_NEED_RESCHED, signaling that the task should preempt. In particular, with the current code, the signal_pending() bailout probably won't work reliably. Change this to look like other functions that read lots of data, such as read_zero(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jann Horn Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index ab366f50cd9c6..4ac2131098128 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -551,13 +551,13 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) } do { - if (large_request && need_resched()) { + if (large_request) { if (signal_pending(current)) { if (!ret) ret = -ERESTARTSYS; break; } - schedule(); + cond_resched(); } chacha20_block(chacha_state, output); -- GitLab From ccaaff57ed50301189cb0910d48f3a112abba804 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 6 Apr 2022 02:36:16 +0200 Subject: [PATCH 0232/4335] random: check for signals every PAGE_SIZE chunk of /dev/[u]random commit e3c1c4fd9e6d14059ed93ebfe15e1c57793b1a05 upstream. In 1448769c9cdb ("random: check for signal_pending() outside of need_resched() check"), Jann pointed out that we previously were only checking the TIF_NOTIFY_SIGNAL and TIF_SIGPENDING flags if the process had TIF_NEED_RESCHED set, which meant in practice, super long reads to /dev/[u]random would delay signal handling by a long time. I tried this using the below program, and indeed I wasn't able to interrupt a /dev/urandom read until after several megabytes had been read. The bug he fixed has always been there, and so code that reads from /dev/urandom without checking the return value of read() has mostly worked for a long time, for most sizes, not just for <= 256. Maybe it makes sense to keep that code working. The reason it was so small prior, ignoring the fact that it didn't work anyway, was likely because /dev/random used to block, and that could happen for pretty large lengths of time while entropy was gathered. But now, it's just a chacha20 call, which is extremely fast and is just operating on pure data, without having to wait for some external event. In that sense, /dev/[u]random is a lot more like /dev/zero. Taking a page out of /dev/zero's read_zero() function, it always returns at least one chunk, and then checks for signals after each chunk. Chunk sizes there are of length PAGE_SIZE. Let's just copy the same thing for /dev/[u]random, and check for signals and cond_resched() for every PAGE_SIZE amount of data. This makes the behavior more consistent with expectations, and should mitigate the impact of Jann's fix for the age-old signal check bug. ---- test program ---- #include #include #include #include static unsigned char x[~0U]; static void handle(int) { } int main(int argc, char *argv[]) { pid_t pid = getpid(), child; signal(SIGUSR1, handle); if (!(child = fork())) { for (;;) kill(pid, SIGUSR1); } pause(); printf("interrupted after reading %zd bytes\n", getrandom(x, sizeof(x), 0)); kill(child, SIGTERM); return 0; } Cc: Jann Horn Cc: Theodore Ts'o Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 4ac2131098128..154865d6108cb 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -525,7 +525,6 @@ EXPORT_SYMBOL(get_random_bytes); static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) { - bool large_request = nbytes > 256; ssize_t ret = 0; size_t len; u32 chacha_state[CHACHA_STATE_WORDS]; @@ -551,15 +550,6 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) } do { - if (large_request) { - if (signal_pending(current)) { - if (!ret) - ret = -ERESTARTSYS; - break; - } - cond_resched(); - } - chacha20_block(chacha_state, output); if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; @@ -573,6 +563,13 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) nbytes -= len; buf += len; ret += len; + + BUILD_BUG_ON(PAGE_SIZE % CHACHA_BLOCK_SIZE != 0); + if (!(ret % PAGE_SIZE) && nbytes) { + if (signal_pending(current)) + break; + cond_resched(); + } } while (nbytes); memzero_explicit(output, sizeof(output)); -- GitLab From b8078810e44d487937c814f2d864690cce69da9a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 7 Apr 2022 21:23:08 +0200 Subject: [PATCH 0233/4335] random: allow partial reads if later user copies fail commit 5209aed5137880fa229746cb521f715e55596460 upstream. Rather than failing entirely if a copy_to_user() fails at some point, instead we should return a partial read for the amount that succeeded prior, unless none succeeded at all, in which case we return -EFAULT as before. This makes it consistent with other reader interfaces. For example, the following snippet for /dev/zero outputs "4" followed by "1": int fd; void *x = mmap(NULL, 4096, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); assert(x != MAP_FAILED); fd = open("/dev/zero", O_RDONLY); assert(fd >= 0); printf("%zd\n", read(fd, x, 4)); printf("%zd\n", read(fd, x + 4095, 4)); close(fd); This brings that same standard behavior to the various RNG reader interfaces. While we're at it, we can streamline the loop logic a little bit. Suggested-by: Linus Torvalds Cc: Jann Horn Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 154865d6108cb..4ec60f4cb0866 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -525,8 +525,7 @@ EXPORT_SYMBOL(get_random_bytes); static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) { - ssize_t ret = 0; - size_t len; + size_t len, left, ret = 0; u32 chacha_state[CHACHA_STATE_WORDS]; u8 output[CHACHA_BLOCK_SIZE]; @@ -545,37 +544,40 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) * the user directly. */ if (nbytes <= CHACHA_KEY_SIZE) { - ret = copy_to_user(buf, &chacha_state[4], nbytes) ? -EFAULT : nbytes; + ret = nbytes - copy_to_user(buf, &chacha_state[4], nbytes); goto out_zero_chacha; } - do { + for (;;) { chacha20_block(chacha_state, output); if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE); - if (copy_to_user(buf, output, len)) { - ret = -EFAULT; + left = copy_to_user(buf, output, len); + if (left) { + ret += len - left; break; } - nbytes -= len; buf += len; ret += len; + nbytes -= len; + if (!nbytes) + break; BUILD_BUG_ON(PAGE_SIZE % CHACHA_BLOCK_SIZE != 0); - if (!(ret % PAGE_SIZE) && nbytes) { + if (ret % PAGE_SIZE == 0) { if (signal_pending(current)) break; cond_resched(); } - } while (nbytes); + } memzero_explicit(output, sizeof(output)); out_zero_chacha: memzero_explicit(chacha_state, sizeof(chacha_state)); - return ret; + return ret ? ret : -EFAULT; } /* -- GitLab From 8b373c113a4caccfb99579f16d2faa63b251737b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 8 Apr 2022 18:14:57 +0200 Subject: [PATCH 0234/4335] random: make random_get_entropy() return an unsigned long commit b0c3e796f24b588b862b61ce235d3c9417dc8983 upstream. Some implementations were returning type `unsigned long`, while others that fell back to get_cycles() were implicitly returning a `cycles_t` or an untyped constant int literal. That makes for weird and confusing code, and basically all code in the kernel already handled it like it was an `unsigned long`. I recently tried to handle it as the largest type it could be, a `cycles_t`, but doing so doesn't really help with much. Instead let's just make random_get_entropy() return an unsigned long all the time. This also matches the commonly used `arch_get_random_long()` function, so now RDRAND and RDTSC return the same sized integer, which means one can fallback to the other more gracefully. Cc: Dominik Brodowski Cc: Theodore Ts'o Acked-by: Thomas Gleixner Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 20 +++++++------------- include/linux/timex.h | 2 +- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 4ec60f4cb0866..8b09d608c8023 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1015,7 +1015,7 @@ int __init rand_initialize(void) */ void add_device_randomness(const void *buf, size_t size) { - cycles_t cycles = random_get_entropy(); + unsigned long cycles = random_get_entropy(); unsigned long flags, now = jiffies; if (crng_init == 0 && size) @@ -1046,8 +1046,7 @@ struct timer_rand_state { */ static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) { - cycles_t cycles = random_get_entropy(); - unsigned long flags, now = jiffies; + unsigned long cycles = random_get_entropy(), now = jiffies, flags; long delta, delta2, delta3; spin_lock_irqsave(&input_pool.lock, flags); @@ -1302,8 +1301,7 @@ static void mix_interrupt_randomness(struct work_struct *work) void add_interrupt_randomness(int irq) { enum { MIX_INFLIGHT = 1U << 31 }; - cycles_t cycles = random_get_entropy(); - unsigned long now = jiffies; + unsigned long cycles = random_get_entropy(), now = jiffies; struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned int new_count; @@ -1316,16 +1314,12 @@ void add_interrupt_randomness(int irq) if (cycles == 0) cycles = get_reg(fast_pool, regs); - if (sizeof(cycles) == 8) + if (sizeof(unsigned long) == 8) { irq_data.u64[0] = cycles ^ rol64(now, 32) ^ irq; - else { + irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_; + } else { irq_data.u32[0] = cycles ^ irq; irq_data.u32[1] = now; - } - - if (sizeof(unsigned long) == 8) - irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_; - else { irq_data.u32[2] = regs ? instruction_pointer(regs) : _RET_IP_; irq_data.u32[3] = get_reg(fast_pool, regs); } @@ -1372,7 +1366,7 @@ static void entropy_timer(struct timer_list *t) static void try_to_generate_entropy(void) { struct { - cycles_t cycles; + unsigned long cycles; struct timer_list timer; } stack; diff --git a/include/linux/timex.h b/include/linux/timex.h index 059b18eb1f1fa..5745c90c88005 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -75,7 +75,7 @@ * By default we use get_cycles() for this purpose, but individual * architectures may override this in their asm/timex.h header file. */ -#define random_get_entropy() get_cycles() +#define random_get_entropy() ((unsigned long)get_cycles()) #endif /* -- GitLab From 4179671f496bdc26d2c85ffb3cca16f297f6d852 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 18 Apr 2022 20:57:31 +0200 Subject: [PATCH 0235/4335] random: document crng_fast_key_erasure() destination possibility commit 8717627d6ac53251ee012c3c7aca392f29f38a42 upstream. This reverts 35a33ff3807d ("random: use memmove instead of memcpy for remaining 32 bytes"), which was made on a totally bogus basis. The thing it was worried about overlapping came from the stack, not from one of its arguments, as Eric pointed out. But the fact that this confusion even happened draws attention to the fact that it's a bit non-obvious that the random_data parameter can alias chacha_state, and in fact should do so when the caller can't rely on the stack being cleared in a timely manner. So this commit documents that. Reported-by: Eric Biggers Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index 8b09d608c8023..7035739d1924c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -320,6 +320,13 @@ static void crng_reseed(void) * the resultant ChaCha state to the user, along with the second * half of the block containing 32 bytes of random data that may * be used; random_data_len may not be greater than 32. + * + * The returned ChaCha state contains within it a copy of the old + * key value, at index 4, so the state should always be zeroed out + * immediately after using in order to maintain forward secrecy. + * If the state cannot be erased in a timely manner, then it is + * safer to set the random_data parameter to &chacha_state[4] so + * that this function overwrites it before returning. */ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], u32 chacha_state[CHACHA_STATE_WORDS], -- GitLab From 2814a9e632db40f6591b12f1e8b6b610ee1bd854 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 3 May 2022 21:43:58 +0200 Subject: [PATCH 0236/4335] random: fix sysctl documentation nits commit 069c4ea6871c18bd368f27756e0f91ffb524a788 upstream. A semicolon was missing, and the almost-alphabetical-but-not ordering was confusing, so regroup these by category instead. Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/sysctl/kernel.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 9df263c85aabe..609b891754081 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1014,6 +1014,9 @@ This is a directory, with the following entries: * ``boot_id``: a UUID generated the first time this is retrieved, and unvarying after that; +* ``uuid``: a UUID generated every time this is retrieved (this can + thus be used to generate UUIDs at will); + * ``entropy_avail``: the pool's entropy count, in bits; * ``poolsize``: the entropy pool size, in bits; @@ -1021,10 +1024,7 @@ This is a directory, with the following entries: * ``urandom_min_reseed_secs``: obsolete (used to determine the minimum number of seconds between urandom pool reseeding). This file is writable for compatibility purposes, but writing to it has no effect - on any RNG behavior. - -* ``uuid``: a UUID generated every time this is retrieved (this can - thus be used to generate UUIDs at will); + on any RNG behavior; * ``write_wakeup_threshold``: when the entropy count drops below this (as a number of bits), processes waiting to write to ``/dev/random`` -- GitLab From 807ed9c29586e6b958e102c4cee793d6e282620e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 5 May 2022 02:20:22 +0200 Subject: [PATCH 0237/4335] init: call time_init() before rand_initialize() commit fe222a6ca2d53c38433cba5d3be62a39099e708e upstream. Currently time_init() is called after rand_initialize(), but rand_initialize() makes use of the timer on various platforms, and sometimes this timer needs to be initialized by time_init() first. In order for random_get_entropy() to not return zero during early boot when it's potentially used as an entropy source, reverse the order of these two calls. The block doing random initialization was right before time_init() before, so changing the order shouldn't have any complicated effects. Cc: Andrew Morton Reviewed-by: Stafford Horne Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- init/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index 06b98350ebd24..2b9ba7ea42eca 100644 --- a/init/main.c +++ b/init/main.c @@ -1041,11 +1041,13 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) softirq_init(); timekeeping_init(); kfence_init(); + time_init(); /* * For best initial stack canary entropy, prepare it after: * - setup_arch() for any UEFI RNG entropy and boot cmdline access * - timekeeping_init() for ktime entropy used in rand_initialize() + * - time_init() for making random_get_entropy() work on some platforms * - rand_initialize() to get any arch-specific entropy like RDRAND * - add_latent_entropy() to get any latent entropy * - adding command line entropy @@ -1055,7 +1057,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) add_device_randomness(command_line, strlen(command_line)); boot_init_stack_canary(); - time_init(); perf_event_init(); profile_init(); call_function_init(); -- GitLab From 9f174326e35ceb355e6ac8dcb750797e9daf61f1 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 23 Apr 2022 21:11:41 +0200 Subject: [PATCH 0238/4335] ia64: define get_cycles macro for arch-override commit 57c0900b91d8891ab43f0e6b464d059fda51d102 upstream. Itanium defines a get_cycles() function, but it does not do the usual `#define get_cycles get_cycles` dance, making it impossible for generic code to see if an arch-specific function was defined. While the get_cycles() ifdef is not currently used, the following timekeeping patch in this series will depend on the macro existing (or not existing) when defining random_get_entropy(). Cc: Thomas Gleixner Cc: Arnd Bergmann Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/ia64/include/asm/timex.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h index 869a3ac6bf23a..7ccc077a60bed 100644 --- a/arch/ia64/include/asm/timex.h +++ b/arch/ia64/include/asm/timex.h @@ -39,6 +39,7 @@ get_cycles (void) ret = ia64_getreg(_IA64_REG_AR_ITC); return ret; } +#define get_cycles get_cycles extern void ia64_cpu_local_tick (void); extern unsigned long long ia64_native_sched_clock (void); -- GitLab From e05efd31b9db39ccd9c489c0f93df7a8e8cb48a8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 23 Apr 2022 21:11:41 +0200 Subject: [PATCH 0239/4335] s390: define get_cycles macro for arch-override commit 2e3df523256cb9836de8441e9c791a796759bb3c upstream. S390x defines a get_cycles() function, but it does not do the usual `#define get_cycles get_cycles` dance, making it impossible for generic code to see if an arch-specific function was defined. While the get_cycles() ifdef is not currently used, the following timekeeping patch in this series will depend on the macro existing (or not existing) when defining random_get_entropy(). Cc: Thomas Gleixner Cc: Arnd Bergmann Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Sven Schnelle Acked-by: Heiko Carstens Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/timex.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 50d9b04ecbd14..bc50ee0e91ff1 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -201,6 +201,7 @@ static inline cycles_t get_cycles(void) { return (cycles_t) get_tod_clock() >> 2; } +#define get_cycles get_cycles int get_phys_clock(unsigned long *clock); void init_cpu_timer(void); -- GitLab From 9bf990cff673ca0747a01ceab0095a71af8033f6 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 23 Apr 2022 21:11:41 +0200 Subject: [PATCH 0240/4335] parisc: define get_cycles macro for arch-override commit 8865bbe6ba1120e67f72201b7003a16202cd42be upstream. PA-RISC defines a get_cycles() function, but it does not do the usual `#define get_cycles get_cycles` dance, making it impossible for generic code to see if an arch-specific function was defined. While the get_cycles() ifdef is not currently used, the following timekeeping patch in this series will depend on the macro existing (or not existing) when defining random_get_entropy(). Cc: Thomas Gleixner Cc: Arnd Bergmann Acked-by: Helge Deller Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/timex.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/timex.h b/arch/parisc/include/asm/timex.h index 06b510f8172e3..b4622cb06a75e 100644 --- a/arch/parisc/include/asm/timex.h +++ b/arch/parisc/include/asm/timex.h @@ -13,9 +13,10 @@ typedef unsigned long cycles_t; -static inline cycles_t get_cycles (void) +static inline cycles_t get_cycles(void) { return mfctl(16); } +#define get_cycles get_cycles #endif -- GitLab From 8ca78fbdeba0160131a4699747fd32f4f480299b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 23 Apr 2022 21:11:41 +0200 Subject: [PATCH 0241/4335] alpha: define get_cycles macro for arch-override commit 1097710bc9660e1e588cf2186a35db3d95c4d258 upstream. Alpha defines a get_cycles() function, but it does not do the usual `#define get_cycles get_cycles` dance, making it impossible for generic code to see if an arch-specific function was defined. While the get_cycles() ifdef is not currently used, the following timekeeping patch in this series will depend on the macro existing (or not existing) when defining random_get_entropy(). Cc: Thomas Gleixner Cc: Arnd Bergmann Cc: Richard Henderson Cc: Ivan Kokshaysky Acked-by: Matt Turner Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/alpha/include/asm/timex.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/alpha/include/asm/timex.h b/arch/alpha/include/asm/timex.h index b565cc6f408e9..f89798da8a147 100644 --- a/arch/alpha/include/asm/timex.h +++ b/arch/alpha/include/asm/timex.h @@ -28,5 +28,6 @@ static inline cycles_t get_cycles (void) __asm__ __volatile__ ("rpcc %0" : "=r"(ret)); return ret; } +#define get_cycles get_cycles #endif -- GitLab From bc94ccb2904eb2a0b8c86599790dbfebd34987b7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 23 Apr 2022 21:11:41 +0200 Subject: [PATCH 0242/4335] powerpc: define get_cycles macro for arch-override commit 408835832158df0357e18e96da7f2d1ed6b80e7f upstream. PowerPC defines a get_cycles() function, but it does not do the usual `#define get_cycles get_cycles` dance, making it impossible for generic code to see if an arch-specific function was defined. While the get_cycles() ifdef is not currently used, the following timekeeping patch in this series will depend on the macro existing (or not existing) when defining random_get_entropy(). Cc: Thomas Gleixner Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Acked-by: Michael Ellerman Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/timex.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index fa2e76e4093a3..14b4489de52c5 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -19,6 +19,7 @@ static inline cycles_t get_cycles(void) { return mftb(); } +#define get_cycles get_cycles #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_TIMEX_H */ -- GitLab From 82f182812f1772ee0d56e82f03f5c252b2bd2140 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 10 Apr 2022 16:49:50 +0200 Subject: [PATCH 0243/4335] timekeeping: Add raw clock fallback for random_get_entropy() commit 1366992e16bddd5e2d9a561687f367f9f802e2e4 upstream. The addition of random_get_entropy_fallback() provides access to whichever time source has the highest frequency, which is useful for gathering entropy on platforms without available cycle counters. It's not necessarily as good as being able to quickly access a cycle counter that the CPU has, but it's still something, even when it falls back to being jiffies-based. In the event that a given arch does not define get_cycles(), falling back to the get_cycles() default implementation that returns 0 is really not the best we can do. Instead, at least calling random_get_entropy_fallback() would be preferable, because that always needs to return _something_, even falling back to jiffies eventually. It's not as though random_get_entropy_fallback() is super high precision or guaranteed to be entropic, but basically anything that's not zero all the time is better than returning zero all the time. Finally, since random_get_entropy_fallback() is used during extremely early boot when randomizing freelists in mm_init(), it can be called before timekeeping has been initialized. In that case there really is nothing we can do; jiffies hasn't even started ticking yet. So just give up and return 0. Suggested-by: Thomas Gleixner Signed-off-by: Jason A. Donenfeld Reviewed-by: Thomas Gleixner Cc: Arnd Bergmann Cc: Theodore Ts'o Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- include/linux/timex.h | 8 ++++++++ kernel/time/timekeeping.c | 15 +++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/linux/timex.h b/include/linux/timex.h index 5745c90c88005..3871b06bd302c 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -62,6 +62,8 @@ #include #include +unsigned long random_get_entropy_fallback(void); + #include #ifndef random_get_entropy @@ -74,8 +76,14 @@ * * By default we use get_cycles() for this purpose, but individual * architectures may override this in their asm/timex.h header file. + * If a given arch does not have get_cycles(), then we fallback to + * using random_get_entropy_fallback(). */ +#ifdef get_cycles #define random_get_entropy() ((unsigned long)get_cycles()) +#else +#define random_get_entropy() random_get_entropy_fallback() +#endif #endif /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3b1398fbddaf8..871c912860ed5 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -2380,6 +2381,20 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc) return 0; } +/** + * random_get_entropy_fallback - Returns the raw clock source value, + * used by random.c for platforms with no valid random_get_entropy(). + */ +unsigned long random_get_entropy_fallback(void) +{ + struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; + struct clocksource *clock = READ_ONCE(tkr->clock); + + if (unlikely(timekeeping_suspended || !clock)) + return 0; + return clock->read(clock); +} +EXPORT_SYMBOL_GPL(random_get_entropy_fallback); /** * do_adjtimex() - Accessor function to NTP __do_adjtimex function -- GitLab From a1428cd7e194740fdd11048b5ba6ab66f31fe7e1 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 8 Apr 2022 18:03:13 +0200 Subject: [PATCH 0244/4335] m68k: use fallback for random_get_entropy() instead of zero commit 0f392c95391f2d708b12971a07edaa7973f9eece upstream. In the event that random_get_entropy() can't access a cycle counter or similar, falling back to returning 0 is really not the best we can do. Instead, at least calling random_get_entropy_fallback() would be preferable, because that always needs to return _something_, even falling back to jiffies eventually. It's not as though random_get_entropy_fallback() is super high precision or guaranteed to be entropic, but basically anything that's not zero all the time is better than returning zero all the time. Cc: Thomas Gleixner Cc: Arnd Bergmann Acked-by: Geert Uytterhoeven Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/m68k/include/asm/timex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/timex.h b/arch/m68k/include/asm/timex.h index 6a21d93582805..f4a7a340f4cae 100644 --- a/arch/m68k/include/asm/timex.h +++ b/arch/m68k/include/asm/timex.h @@ -35,7 +35,7 @@ static inline unsigned long random_get_entropy(void) { if (mach_random_get_entropy) return mach_random_get_entropy(); - return 0; + return random_get_entropy_fallback(); } #define random_get_entropy random_get_entropy -- GitLab From 2ab416043a94087a8bc28ce748db0d7bce850b43 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 8 Apr 2022 18:03:13 +0200 Subject: [PATCH 0245/4335] riscv: use fallback for random_get_entropy() instead of zero commit 6d01238623faa9425f820353d2066baf6c9dc872 upstream. In the event that random_get_entropy() can't access a cycle counter or similar, falling back to returning 0 is really not the best we can do. Instead, at least calling random_get_entropy_fallback() would be preferable, because that always needs to return _something_, even falling back to jiffies eventually. It's not as though random_get_entropy_fallback() is super high precision or guaranteed to be entropic, but basically anything that's not zero all the time is better than returning zero all the time. Cc: Thomas Gleixner Cc: Arnd Bergmann Cc: Paul Walmsley Acked-by: Palmer Dabbelt Reviewed-by: Palmer Dabbelt Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/timex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index 507cae273bc62..d6a7428f6248d 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -41,7 +41,7 @@ static inline u32 get_cycles_hi(void) static inline unsigned long random_get_entropy(void) { if (unlikely(clint_time_val == NULL)) - return 0; + return random_get_entropy_fallback(); return get_cycles(); } #define random_get_entropy() random_get_entropy() -- GitLab From 9e6db825e87a41622c3103609fb02faa7aa27a9d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 8 Apr 2022 18:03:13 +0200 Subject: [PATCH 0246/4335] mips: use fallback for random_get_entropy() instead of just c0 random commit 1c99c6a7c3c599a68321b01b9ec243215ede5a68 upstream. For situations in which we don't have a c0 counter register available, we've been falling back to reading the c0 "random" register, which is usually bounded by the amount of TLB entries and changes every other cycle or so. This means it wraps extremely often. We can do better by combining this fast-changing counter with a potentially slower-changing counter from random_get_entropy_fallback() in the more significant bits. This commit combines the two, taking into account that the changing bits are in a different bit position depending on the CPU model. In addition, we previously were falling back to 0 for ancient CPUs that Linux does not support anyway; remove that dead path entirely. Cc: Thomas Gleixner Cc: Arnd Bergmann Tested-by: Maciej W. Rozycki Acked-by: Thomas Bogendoerfer Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/timex.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h index 8026baf46e729..2e107886f97ac 100644 --- a/arch/mips/include/asm/timex.h +++ b/arch/mips/include/asm/timex.h @@ -76,25 +76,24 @@ static inline cycles_t get_cycles(void) else return 0; /* no usable counter */ } +#define get_cycles get_cycles /* * Like get_cycles - but where c0_count is not available we desperately * use c0_random in an attempt to get at least a little bit of entropy. - * - * R6000 and R6000A neither have a count register nor a random register. - * That leaves no entropy source in the CPU itself. */ static inline unsigned long random_get_entropy(void) { - unsigned int prid = read_c0_prid(); - unsigned int imp = prid & PRID_IMP_MASK; + unsigned int c0_random; - if (can_use_mips_counter(prid)) + if (can_use_mips_counter(read_c0_prid())) return read_c0_count(); - else if (likely(imp != PRID_IMP_R6000 && imp != PRID_IMP_R6000A)) - return read_c0_random(); + + if (cpu_has_3kex) + c0_random = (read_c0_random() >> 8) & 0x3f; else - return 0; /* no usable register */ + c0_random = read_c0_random() & 0x3f; + return (random_get_entropy_fallback() << 6) | (0x3f - c0_random); } #define random_get_entropy random_get_entropy -- GitLab From 197d25e068c02230e68c8a043a3909bd4c04fc16 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 8 Apr 2022 18:03:13 +0200 Subject: [PATCH 0247/4335] arm: use fallback for random_get_entropy() instead of zero commit ff8a8f59c99f6a7c656387addc4d9f2247d75077 upstream. In the event that random_get_entropy() can't access a cycle counter or similar, falling back to returning 0 is really not the best we can do. Instead, at least calling random_get_entropy_fallback() would be preferable, because that always needs to return _something_, even falling back to jiffies eventually. It's not as though random_get_entropy_fallback() is super high precision or guaranteed to be entropic, but basically anything that's not zero all the time is better than returning zero all the time. Cc: Thomas Gleixner Cc: Arnd Bergmann Reviewed-by: Russell King (Oracle) Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/timex.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h index 7c3b3671d6c25..6d1337c169cd3 100644 --- a/arch/arm/include/asm/timex.h +++ b/arch/arm/include/asm/timex.h @@ -11,5 +11,6 @@ typedef unsigned long cycles_t; #define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; }) +#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback()) #endif -- GitLab From 38fbfa404da6118f9a86b7f3681eab09cfbfcb35 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 8 Apr 2022 18:03:13 +0200 Subject: [PATCH 0248/4335] nios2: use fallback for random_get_entropy() instead of zero commit c04e72700f2293013dab40208e809369378f224c upstream. In the event that random_get_entropy() can't access a cycle counter or similar, falling back to returning 0 is really not the best we can do. Instead, at least calling random_get_entropy_fallback() would be preferable, because that always needs to return _something_, even falling back to jiffies eventually. It's not as though random_get_entropy_fallback() is super high precision or guaranteed to be entropic, but basically anything that's not zero all the time is better than returning zero all the time. Cc: Thomas Gleixner Cc: Arnd Bergmann Acked-by: Dinh Nguyen Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/nios2/include/asm/timex.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/nios2/include/asm/timex.h b/arch/nios2/include/asm/timex.h index a769f871b28d9..40a1adc9bd03e 100644 --- a/arch/nios2/include/asm/timex.h +++ b/arch/nios2/include/asm/timex.h @@ -8,5 +8,8 @@ typedef unsigned long cycles_t; extern cycles_t get_cycles(void); +#define get_cycles get_cycles + +#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback()) #endif -- GitLab From e017497815f3e676421a2afb0abe4ae03dbaad2a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 8 Apr 2022 18:03:13 +0200 Subject: [PATCH 0249/4335] x86/tsc: Use fallback for random_get_entropy() instead of zero commit 3bd4abc07a267e6a8b33d7f8717136e18f921c53 upstream. In the event that random_get_entropy() can't access a cycle counter or similar, falling back to returning 0 is suboptimal. Instead, fallback to calling random_get_entropy_fallback(), which isn't extremely high precision or guaranteed to be entropic, but is certainly better than returning zero all the time. If CONFIG_X86_TSC=n, then it's possible for the kernel to run on systems without RDTSC, such as 486 and certain 586, so the fallback code is only required for that case. As well, fix up both the new function and the get_cycles() function from which it was derived to use cpu_feature_enabled() rather than boot_cpu_has(), and use !IS_ENABLED() instead of #ifndef. Signed-off-by: Jason A. Donenfeld Reviewed-by: Thomas Gleixner Cc: Thomas Gleixner Cc: Arnd Bergmann Cc: Borislav Petkov Cc: x86@kernel.org Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/timex.h | 9 +++++++++ arch/x86/include/asm/tsc.h | 7 +++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h index a4a8b1b16c0c1..956e4145311b1 100644 --- a/arch/x86/include/asm/timex.h +++ b/arch/x86/include/asm/timex.h @@ -5,6 +5,15 @@ #include #include +static inline unsigned long random_get_entropy(void) +{ + if (!IS_ENABLED(CONFIG_X86_TSC) && + !cpu_feature_enabled(X86_FEATURE_TSC)) + return random_get_entropy_fallback(); + return rdtsc(); +} +#define random_get_entropy random_get_entropy + /* Assume we use the PIT time source for the clock tick */ #define CLOCK_TICK_RATE PIT_TICK_RATE diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 01a300a9700b9..fbdc3d9514943 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -20,13 +20,12 @@ extern void disable_TSC(void); static inline cycles_t get_cycles(void) { -#ifndef CONFIG_X86_TSC - if (!boot_cpu_has(X86_FEATURE_TSC)) + if (!IS_ENABLED(CONFIG_X86_TSC) && + !cpu_feature_enabled(X86_FEATURE_TSC)) return 0; -#endif - return rdtsc(); } +#define get_cycles get_cycles extern struct system_counterval_t convert_art_to_tsc(u64 art); extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); -- GitLab From d876dca6edf385e22bd347c22e5d9cbb88ba4225 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 8 Apr 2022 18:03:13 +0200 Subject: [PATCH 0250/4335] um: use fallback for random_get_entropy() instead of zero commit 9f13fb0cd11ed2327abff69f6501a2c124c88b5a upstream. In the event that random_get_entropy() can't access a cycle counter or similar, falling back to returning 0 is really not the best we can do. Instead, at least calling random_get_entropy_fallback() would be preferable, because that always needs to return _something_, even falling back to jiffies eventually. It's not as though random_get_entropy_fallback() is super high precision or guaranteed to be entropic, but basically anything that's not zero all the time is better than returning zero all the time. This is accomplished by just including the asm-generic code like on other architectures, which means we can get rid of the empty stub function here. Cc: Thomas Gleixner Cc: Arnd Bergmann Cc: Richard Weinberger Cc: Anton Ivanov Acked-by: Johannes Berg Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/um/include/asm/timex.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/um/include/asm/timex.h b/arch/um/include/asm/timex.h index e392a9a5bc9bd..9f27176adb26d 100644 --- a/arch/um/include/asm/timex.h +++ b/arch/um/include/asm/timex.h @@ -2,13 +2,8 @@ #ifndef __UM_TIMEX_H #define __UM_TIMEX_H -typedef unsigned long cycles_t; - -static inline cycles_t get_cycles (void) -{ - return 0; -} - #define CLOCK_TICK_RATE (HZ) +#include + #endif -- GitLab From b4582cb351677beab2a1be02f496b1120a8ed30c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 8 Apr 2022 18:03:13 +0200 Subject: [PATCH 0251/4335] sparc: use fallback for random_get_entropy() instead of zero commit ac9756c79797bb98972736b13cfb239fd2cffb79 upstream. In the event that random_get_entropy() can't access a cycle counter or similar, falling back to returning 0 is really not the best we can do. Instead, at least calling random_get_entropy_fallback() would be preferable, because that always needs to return _something_, even falling back to jiffies eventually. It's not as though random_get_entropy_fallback() is super high precision or guaranteed to be entropic, but basically anything that's not zero all the time is better than returning zero all the time. This is accomplished by just including the asm-generic code like on other architectures, which means we can get rid of the empty stub function here. Cc: Thomas Gleixner Cc: Arnd Bergmann Cc: David S. Miller Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/sparc/include/asm/timex_32.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/sparc/include/asm/timex_32.h b/arch/sparc/include/asm/timex_32.h index 542915b462097..f86326a6f89e0 100644 --- a/arch/sparc/include/asm/timex_32.h +++ b/arch/sparc/include/asm/timex_32.h @@ -9,8 +9,6 @@ #define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ -/* XXX Maybe do something better at some point... -DaveM */ -typedef unsigned long cycles_t; -#define get_cycles() (0) +#include #endif -- GitLab From e6bd242523e8c8edb3feee020c78c6252c6b80e3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 8 Apr 2022 18:03:13 +0200 Subject: [PATCH 0252/4335] xtensa: use fallback for random_get_entropy() instead of zero commit e10e2f58030c5c211d49042a8c2a1b93d40b2ffb upstream. In the event that random_get_entropy() can't access a cycle counter or similar, falling back to returning 0 is really not the best we can do. Instead, at least calling random_get_entropy_fallback() would be preferable, because that always needs to return _something_, even falling back to jiffies eventually. It's not as though random_get_entropy_fallback() is super high precision or guaranteed to be entropic, but basically anything that's not zero all the time is better than returning zero all the time. This is accomplished by just including the asm-generic code like on other architectures, which means we can get rid of the empty stub function here. Cc: Thomas Gleixner Cc: Arnd Bergmann Acked-by: Max Filippov Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/include/asm/timex.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/xtensa/include/asm/timex.h b/arch/xtensa/include/asm/timex.h index 233ec75e60c69..3f2462f2d0270 100644 --- a/arch/xtensa/include/asm/timex.h +++ b/arch/xtensa/include/asm/timex.h @@ -29,10 +29,6 @@ extern unsigned long ccount_freq; -typedef unsigned long long cycles_t; - -#define get_cycles() (0) - void local_timer_setup(unsigned cpu); /* @@ -59,4 +55,6 @@ static inline void set_linux_timer (unsigned long ccompare) xtensa_set_sr(ccompare, SREG_CCOMPARE + LINUX_TIMER); } +#include + #endif /* _XTENSA_TIMEX_H */ -- GitLab From 1ab530cf40dcd83233724faaa0bd88c6ab3ca2a5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 12 Apr 2022 19:59:57 +0200 Subject: [PATCH 0253/4335] random: insist on random_get_entropy() existing in order to simplify commit 4b758eda851eb9336ca86a0041a4d3da55f66511 upstream. All platforms are now guaranteed to provide some value for random_get_entropy(). In case some bug leads to this not being so, we print a warning, because that indicates that something is really very wrong (and likely other things are impacted too). This should never be hit, but it's a good and cheap way of finding out if something ever is problematic. Since we now have viable fallback code for random_get_entropy() on all platforms, which is, in the worst case, not worse than jiffies, we can count on getting the best possible value out of it. That means there's no longer a use for using jiffies as entropy input. It also means we no longer have a reason for doing the round-robin register flow in the IRQ handler, which was always of fairly dubious value. Instead we can greatly simplify the IRQ handler inputs and also unify the construction between 64-bits and 32-bits. We now collect the cycle counter and the return address, since those are the two things that matter. Because the return address and the irq number are likely related, to the extent we mix in the irq number, we can just xor it into the top unchanging bytes of the return address, rather than the bottom changing bytes of the cycle counter as before. Then, we can do a fixed 2 rounds of SipHash/HSipHash. Finally, we use the same construction of hashing only half of the [H]SipHash state on 32-bit and 64-bit. We're not actually discarding any entropy, since that entropy is carried through until the next time. And more importantly, it lets us do the same sponge-like construction everywhere. Cc: Theodore Ts'o Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 86 +++++++++++++------------------------------ 1 file changed, 26 insertions(+), 60 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 7035739d1924c..f04cf5d8bcf78 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1022,15 +1022,14 @@ int __init rand_initialize(void) */ void add_device_randomness(const void *buf, size_t size) { - unsigned long cycles = random_get_entropy(); - unsigned long flags, now = jiffies; + unsigned long entropy = random_get_entropy(); + unsigned long flags; if (crng_init == 0 && size) crng_pre_init_inject(buf, size, false); spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(&cycles, sizeof(cycles)); - _mix_pool_bytes(&now, sizeof(now)); + _mix_pool_bytes(&entropy, sizeof(entropy)); _mix_pool_bytes(buf, size); spin_unlock_irqrestore(&input_pool.lock, flags); } @@ -1053,12 +1052,11 @@ struct timer_rand_state { */ static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) { - unsigned long cycles = random_get_entropy(), now = jiffies, flags; + unsigned long entropy = random_get_entropy(), now = jiffies, flags; long delta, delta2, delta3; spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(&cycles, sizeof(cycles)); - _mix_pool_bytes(&now, sizeof(now)); + _mix_pool_bytes(&entropy, sizeof(entropy)); _mix_pool_bytes(&num, sizeof(num)); spin_unlock_irqrestore(&input_pool.lock, flags); @@ -1186,7 +1184,6 @@ struct fast_pool { unsigned long pool[4]; unsigned long last; unsigned int count; - u16 reg_idx; }; static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = { @@ -1204,13 +1201,13 @@ static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = { * This is [Half]SipHash-1-x, starting from an empty key. Because * the key is fixed, it assumes that its inputs are non-malicious, * and therefore this has no security on its own. s represents the - * 128 or 256-bit SipHash state, while v represents a 128-bit input. + * four-word SipHash state, while v represents a two-word input. */ -static void fast_mix(unsigned long s[4], const unsigned long *v) +static void fast_mix(unsigned long s[4], const unsigned long v[2]) { size_t i; - for (i = 0; i < 16 / sizeof(long); ++i) { + for (i = 0; i < 2; ++i) { s[3] ^= v[i]; #ifdef CONFIG_64BIT s[0] += s[1]; s[1] = rol64(s[1], 13); s[1] ^= s[0]; s[0] = rol64(s[0], 32); @@ -1250,33 +1247,17 @@ int random_online_cpu(unsigned int cpu) } #endif -static unsigned long get_reg(struct fast_pool *f, struct pt_regs *regs) -{ - unsigned long *ptr = (unsigned long *)regs; - unsigned int idx; - - if (regs == NULL) - return 0; - idx = READ_ONCE(f->reg_idx); - if (idx >= sizeof(struct pt_regs) / sizeof(unsigned long)) - idx = 0; - ptr += idx++; - WRITE_ONCE(f->reg_idx, idx); - return *ptr; -} - static void mix_interrupt_randomness(struct work_struct *work) { struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix); /* - * The size of the copied stack pool is explicitly 16 bytes so that we - * tax mix_pool_byte()'s compression function the same amount on all - * platforms. This means on 64-bit we copy half the pool into this, - * while on 32-bit we copy all of it. The entropy is supposed to be - * sufficiently dispersed between bits that in the sponge-like - * half case, on average we don't wind up "losing" some. + * The size of the copied stack pool is explicitly 2 longs so that we + * only ever ingest half of the siphash output each time, retaining + * the other half as the next "key" that carries over. The entropy is + * supposed to be sufficiently dispersed between bits so on average + * we don't wind up "losing" some. */ - u8 pool[16]; + unsigned long pool[2]; /* Check to see if we're running on the wrong CPU due to hotplug. */ local_irq_disable(); @@ -1308,36 +1289,21 @@ static void mix_interrupt_randomness(struct work_struct *work) void add_interrupt_randomness(int irq) { enum { MIX_INFLIGHT = 1U << 31 }; - unsigned long cycles = random_get_entropy(), now = jiffies; + unsigned long entropy = random_get_entropy(); struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned int new_count; - union { - u32 u32[4]; - u64 u64[2]; - unsigned long longs[16 / sizeof(long)]; - } irq_data; - - if (cycles == 0) - cycles = get_reg(fast_pool, regs); - - if (sizeof(unsigned long) == 8) { - irq_data.u64[0] = cycles ^ rol64(now, 32) ^ irq; - irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_; - } else { - irq_data.u32[0] = cycles ^ irq; - irq_data.u32[1] = now; - irq_data.u32[2] = regs ? instruction_pointer(regs) : _RET_IP_; - irq_data.u32[3] = get_reg(fast_pool, regs); - } - fast_mix(fast_pool->pool, irq_data.longs); + fast_mix(fast_pool->pool, (unsigned long[2]){ + entropy, + (regs ? instruction_pointer(regs) : _RET_IP_) ^ swab(irq) + }); new_count = ++fast_pool->count; if (new_count & MIX_INFLIGHT) return; - if (new_count < 64 && (!time_after(now, fast_pool->last + HZ) || + if (new_count < 64 && (!time_is_before_jiffies(fast_pool->last + HZ) || unlikely(crng_init == 0))) return; @@ -1373,28 +1339,28 @@ static void entropy_timer(struct timer_list *t) static void try_to_generate_entropy(void) { struct { - unsigned long cycles; + unsigned long entropy; struct timer_list timer; } stack; - stack.cycles = random_get_entropy(); + stack.entropy = random_get_entropy(); /* Slow counter - or none. Don't even bother */ - if (stack.cycles == random_get_entropy()) + if (stack.entropy == random_get_entropy()) return; timer_setup_on_stack(&stack.timer, entropy_timer, 0); while (!crng_ready() && !signal_pending(current)) { if (!timer_pending(&stack.timer)) mod_timer(&stack.timer, jiffies + 1); - mix_pool_bytes(&stack.cycles, sizeof(stack.cycles)); + mix_pool_bytes(&stack.entropy, sizeof(stack.entropy)); schedule(); - stack.cycles = random_get_entropy(); + stack.entropy = random_get_entropy(); } del_timer_sync(&stack.timer); destroy_timer_on_stack(&stack.timer); - mix_pool_bytes(&stack.cycles, sizeof(stack.cycles)); + mix_pool_bytes(&stack.entropy, sizeof(stack.entropy)); } -- GitLab From 0d79a47b4ae06bc4994b9f0d96a41cf48f6d765d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 3 May 2022 14:14:32 +0200 Subject: [PATCH 0254/4335] random: do not use batches when !crng_ready() commit cbe89e5a375a51bbb952929b93fa973416fea74e upstream. It's too hard to keep the batches synchronized, and pointless anyway, since in !crng_ready(), we're updating the base_crng key really often, where batching only hurts. So instead, if the crng isn't ready, just call into get_random_bytes(). At this stage nothing is performance critical anyhow. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index f04cf5d8bcf78..01097606fed7f 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -467,10 +467,8 @@ static void crng_pre_init_inject(const void *input, size_t len, bool account) if (account) { crng_init_cnt += min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); - if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { - ++base_crng.generation; + if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) crng_init = 1; - } } spin_unlock_irqrestore(&base_crng.lock, flags); @@ -626,6 +624,11 @@ u64 get_random_u64(void) warn_unseeded_randomness(&previous); + if (!crng_ready()) { + _get_random_bytes(&ret, sizeof(ret)); + return ret; + } + local_lock_irqsave(&batched_entropy_u64.lock, flags); batch = raw_cpu_ptr(&batched_entropy_u64); @@ -660,6 +663,11 @@ u32 get_random_u32(void) warn_unseeded_randomness(&previous); + if (!crng_ready()) { + _get_random_bytes(&ret, sizeof(ret)); + return ret; + } + local_lock_irqsave(&batched_entropy_u32.lock, flags); batch = raw_cpu_ptr(&batched_entropy_u32); -- GitLab From 0d24003b5fde78913a957e6db6a98df280e57c4c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 30 Apr 2022 15:08:20 +0200 Subject: [PATCH 0255/4335] random: use first 128 bits of input as fast init commit 5c3b747ef54fa2a7318776777f6044540d99f721 upstream. Before, the first 64 bytes of input, regardless of how entropic it was, would be used to mutate the crng base key directly, and none of those bytes would be credited as having entropy. Then 256 bits of credited input would be accumulated, and only then would the rng transition from the earlier "fast init" phase into being actually initialized. The thinking was that by mixing and matching fast init and real init, an attacker who compromised the fast init state, considered easy to do given how little entropy might be in those first 64 bytes, would then be able to bruteforce bits from the actual initialization. By keeping these separate, bruteforcing became impossible. However, by not crediting potentially creditable bits from those first 64 bytes of input, we delay initialization, and actually make the problem worse, because it means the user is drawing worse random numbers for a longer period of time. Instead, we can take the first 128 bits as fast init, and allow them to be credited, and then hold off on the next 128 bits until they've accumulated. This is still a wide enough margin to prevent bruteforcing the rng state, while still initializing much faster. Then, rather than trying to piecemeal inject into the base crng key at various points, instead just extract from the pool when we need it, for the crng_init==0 phase. Performance may even be better for the various inputs here, since there are likely more calls to mix_pool_bytes() then there are to get_random_bytes() during this phase of system execution. Since the preinit injection code is gone, bootloader randomness can then do something significantly more straight forward, removing the weird system_wq hack in hwgenerator randomness. Cc: Theodore Ts'o Cc: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 146 ++++++++++++++---------------------------- 1 file changed, 49 insertions(+), 97 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 01097606fed7f..9beba425c5080 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -233,10 +233,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void * *********************************************************************/ -enum { - CRNG_RESEED_INTERVAL = 300 * HZ, - CRNG_INIT_CNT_THRESH = 2 * CHACHA_KEY_SIZE -}; +enum { CRNG_RESEED_INTERVAL = 300 * HZ }; static struct { u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long)); @@ -260,6 +257,8 @@ static DEFINE_PER_CPU(struct crng, crngs) = { /* Used by crng_reseed() to extract a new seed from the input pool. */ static bool drain_entropy(void *buf, size_t nbytes); +/* Used by crng_make_state() to extract a new seed when crng_init==0. */ +static void extract_entropy(void *buf, size_t nbytes); /* * This extracts a new crng key from the input pool, but only if there is a @@ -384,17 +383,20 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], /* * For the fast path, we check whether we're ready, unlocked first, and * then re-check once locked later. In the case where we're really not - * ready, we do fast key erasure with the base_crng directly, because - * this is what crng_pre_init_inject() mutates during early init. + * ready, we do fast key erasure with the base_crng directly, extracting + * when crng_init==0. */ if (!crng_ready()) { bool ready; spin_lock_irqsave(&base_crng.lock, flags); ready = crng_ready(); - if (!ready) + if (!ready) { + if (crng_init == 0) + extract_entropy(base_crng.key, sizeof(base_crng.key)); crng_fast_key_erasure(base_crng.key, chacha_state, random_data, random_data_len); + } spin_unlock_irqrestore(&base_crng.lock, flags); if (!ready) return; @@ -435,48 +437,6 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], local_unlock_irqrestore(&crngs.lock, flags); } -/* - * This function is for crng_init == 0 only. It loads entropy directly - * into the crng's key, without going through the input pool. It is, - * generally speaking, not very safe, but we use this only at early - * boot time when it's better to have something there rather than - * nothing. - * - * If account is set, then the crng_init_cnt counter is incremented. - * This shouldn't be set by functions like add_device_randomness(), - * where we can't trust the buffer passed to it is guaranteed to be - * unpredictable (so it might not have any entropy at all). - */ -static void crng_pre_init_inject(const void *input, size_t len, bool account) -{ - static int crng_init_cnt = 0; - struct blake2s_state hash; - unsigned long flags; - - blake2s_init(&hash, sizeof(base_crng.key)); - - spin_lock_irqsave(&base_crng.lock, flags); - if (crng_init != 0) { - spin_unlock_irqrestore(&base_crng.lock, flags); - return; - } - - blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); - blake2s_update(&hash, input, len); - blake2s_final(&hash, base_crng.key); - - if (account) { - crng_init_cnt += min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); - if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) - crng_init = 1; - } - - spin_unlock_irqrestore(&base_crng.lock, flags); - - if (crng_init == 1) - pr_notice("fast init done\n"); -} - static void _get_random_bytes(void *buf, size_t nbytes) { u32 chacha_state[CHACHA_STATE_WORDS]; @@ -789,7 +749,8 @@ EXPORT_SYMBOL(get_random_bytes_arch); enum { POOL_BITS = BLAKE2S_HASH_SIZE * 8, - POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ + POOL_MIN_BITS = POOL_BITS, /* No point in settling for less. */ + POOL_FAST_INIT_BITS = POOL_MIN_BITS / 2 }; /* For notifying userspace should write into /dev/random. */ @@ -826,24 +787,6 @@ static void mix_pool_bytes(const void *in, size_t nbytes) spin_unlock_irqrestore(&input_pool.lock, flags); } -static void credit_entropy_bits(size_t nbits) -{ - unsigned int entropy_count, orig, add; - - if (!nbits) - return; - - add = min_t(size_t, nbits, POOL_BITS); - - do { - orig = READ_ONCE(input_pool.entropy_count); - entropy_count = min_t(unsigned int, POOL_BITS, orig + add); - } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); - - if (!crng_ready() && entropy_count >= POOL_MIN_BITS) - crng_reseed(); -} - /* * This is an HKDF-like construction for using the hashed collected entropy * as a PRF key, that's then expanded block-by-block. @@ -909,6 +852,33 @@ static bool drain_entropy(void *buf, size_t nbytes) return true; } +static void credit_entropy_bits(size_t nbits) +{ + unsigned int entropy_count, orig, add; + unsigned long flags; + + if (!nbits) + return; + + add = min_t(size_t, nbits, POOL_BITS); + + do { + orig = READ_ONCE(input_pool.entropy_count); + entropy_count = min_t(unsigned int, POOL_BITS, orig + add); + } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); + + if (!crng_ready() && entropy_count >= POOL_MIN_BITS) + crng_reseed(); + else if (unlikely(crng_init == 0 && entropy_count >= POOL_FAST_INIT_BITS)) { + spin_lock_irqsave(&base_crng.lock, flags); + if (crng_init == 0) { + extract_entropy(base_crng.key, sizeof(base_crng.key)); + crng_init = 1; + } + spin_unlock_irqrestore(&base_crng.lock, flags); + } +} + /********************************************************************** * @@ -951,9 +921,9 @@ static bool drain_entropy(void *buf, size_t nbytes) * entropy as specified by the caller. If the entropy pool is full it will * block until more entropy is needed. * - * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or - * add_device_randomness(), depending on whether or not the configuration - * option CONFIG_RANDOM_TRUST_BOOTLOADER is set. + * add_bootloader_randomness() is called by bootloader drivers, such as EFI + * and device tree, and credits its input depending on whether or not the + * configuration option CONFIG_RANDOM_TRUST_BOOTLOADER is set. * * add_interrupt_randomness() uses the interrupt timing as random * inputs to the entropy pool. Using the cycle counters and the irq source @@ -1033,9 +1003,6 @@ void add_device_randomness(const void *buf, size_t size) unsigned long entropy = random_get_entropy(); unsigned long flags; - if (crng_init == 0 && size) - crng_pre_init_inject(buf, size, false); - spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(&entropy, sizeof(entropy)); _mix_pool_bytes(buf, size); @@ -1151,12 +1118,6 @@ void rand_initialize_disk(struct gendisk *disk) void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy) { - if (unlikely(crng_init == 0 && entropy < POOL_MIN_BITS)) { - crng_pre_init_inject(buffer, count, true); - mix_pool_bytes(buffer, count); - return; - } - /* * Throttle writing if we're above the trickle threshold. * We'll be woken up again once below POOL_MIN_BITS, when @@ -1164,7 +1125,7 @@ void add_hwgenerator_randomness(const void *buffer, size_t count, * CRNG_RESEED_INTERVAL has elapsed. */ wait_event_interruptible_timeout(random_write_wait, - !system_wq || kthread_should_stop() || + kthread_should_stop() || input_pool.entropy_count < POOL_MIN_BITS, CRNG_RESEED_INTERVAL); mix_pool_bytes(buffer, count); @@ -1173,17 +1134,14 @@ void add_hwgenerator_randomness(const void *buffer, size_t count, EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); /* - * Handle random seed passed by bootloader. - * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise - * it would be regarded as device data. - * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. + * Handle random seed passed by bootloader, and credit it if + * CONFIG_RANDOM_TRUST_BOOTLOADER is set. */ void add_bootloader_randomness(const void *buf, size_t size) { + mix_pool_bytes(buf, size); if (trust_bootloader) - add_hwgenerator_randomness(buf, size, size * 8); - else - add_device_randomness(buf, size); + credit_entropy_bits(size * 8); } EXPORT_SYMBOL_GPL(add_bootloader_randomness); @@ -1283,13 +1241,8 @@ static void mix_interrupt_randomness(struct work_struct *work) fast_pool->last = jiffies; local_irq_enable(); - if (unlikely(crng_init == 0)) { - crng_pre_init_inject(pool, sizeof(pool), true); - mix_pool_bytes(pool, sizeof(pool)); - } else { - mix_pool_bytes(pool, sizeof(pool)); - credit_entropy_bits(1); - } + mix_pool_bytes(pool, sizeof(pool)); + credit_entropy_bits(1); memzero_explicit(pool, sizeof(pool)); } @@ -1311,8 +1264,7 @@ void add_interrupt_randomness(int irq) if (new_count & MIX_INFLIGHT) return; - if (new_count < 64 && (!time_is_before_jiffies(fast_pool->last + HZ) || - unlikely(crng_init == 0))) + if (new_count < 64 && !time_is_before_jiffies(fast_pool->last + HZ)) return; if (unlikely(!fast_pool->mix.func)) -- GitLab From baf06217704b2597c1a492aa4fc8bd680b2df169 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 30 Apr 2022 22:03:29 +0200 Subject: [PATCH 0256/4335] random: do not pretend to handle premature next security model commit e85c0fc1d94c52483a603651748d4c76d6aa1c6b upstream. Per the thread linked below, "premature next" is not considered to be a realistic threat model, and leads to more serious security problems. "Premature next" is the scenario in which: - Attacker compromises the current state of a fully initialized RNG via some kind of infoleak. - New bits of entropy are added directly to the key used to generate the /dev/urandom stream, without any buffering or pooling. - Attacker then, somehow having read access to /dev/urandom, samples RNG output and brute forces the individual new bits that were added. - Result: the RNG never "recovers" from the initial compromise, a so-called violation of what academics term "post-compromise security". The usual solutions to this involve some form of delaying when entropy gets mixed into the crng. With Fortuna, this involves multiple input buckets. With what the Linux RNG was trying to do prior, this involves entropy estimation. However, by delaying when entropy gets mixed in, it also means that RNG compromises are extremely dangerous during the window of time before the RNG has gathered enough entropy, during which time nonces may become predictable (or repeated), ephemeral keys may not be secret, and so forth. Moreover, it's unclear how realistic "premature next" is from an attack perspective, if these attacks even make sense in practice. Put together -- and discussed in more detail in the thread below -- these constitute grounds for just doing away with the current code that pretends to handle premature next. I say "pretends" because it wasn't doing an especially great job at it either; should we change our mind about this direction, we would probably implement Fortuna to "fix" the "problem", in which case, removing the pretend solution still makes sense. This also reduces the crng reseed period from 5 minutes down to 1 minute. The rationale from the thread might lead us toward reducing that even further in the future (or even eliminating it), but that remains a topic of a future commit. At a high level, this patch changes semantics from: Before: Seed for the first time after 256 "bits" of estimated entropy have been accumulated since the system booted. Thereafter, reseed once every five minutes, but only if 256 new "bits" have been accumulated since the last reseeding. After: Seed for the first time after 256 "bits" of estimated entropy have been accumulated since the system booted. Thereafter, reseed once every minute. Most of this patch is renaming and removing: POOL_MIN_BITS becomes POOL_INIT_BITS, credit_entropy_bits() becomes credit_init_bits(), crng_reseed() loses its "force" parameter since it's now always true, the drain_entropy() function no longer has any use so it's removed, entropy estimation is skipped if we've already init'd, the various notifiers for "low on entropy" are now only active prior to init, and finally, some documentation comments are cleaned up here and there. Link: https://lore.kernel.org/lkml/YmlMGx6+uigkGiZ0@zx2c4.com/ Cc: Theodore Ts'o Cc: Nadia Heninger Cc: Tom Ristenpart Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 174 +++++++++++++++--------------------------- 1 file changed, 62 insertions(+), 112 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 9beba425c5080..5be537f80baba 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -15,14 +15,12 @@ * - Sysctl interface. * * The high level overview is that there is one input pool, into which - * various pieces of data are hashed. Some of that data is then "credited" as - * having a certain number of bits of entropy. When enough bits of entropy are - * available, the hash is finalized and handed as a key to a stream cipher that - * expands it indefinitely for various consumers. This key is periodically - * refreshed as the various entropy collectors, described below, add data to the - * input pool and credit it. There is currently no Fortuna-like scheduler - * involved, which can lead to malicious entropy sources causing a premature - * reseed, and the entropy estimates are, at best, conservative guesses. + * various pieces of data are hashed. Prior to initialization, some of that + * data is then "credited" as having a certain number of bits of entropy. + * When enough bits of entropy are available, the hash is finalized and + * handed as a key to a stream cipher that expands it indefinitely for + * various consumers. This key is periodically refreshed as the various + * entropy collectors, described below, add data to the input pool. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -233,7 +231,10 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void * *********************************************************************/ -enum { CRNG_RESEED_INTERVAL = 300 * HZ }; +enum { + CRNG_RESEED_START_INTERVAL = HZ, + CRNG_RESEED_INTERVAL = 60 * HZ +}; static struct { u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long)); @@ -255,16 +256,10 @@ static DEFINE_PER_CPU(struct crng, crngs) = { .lock = INIT_LOCAL_LOCK(crngs.lock), }; -/* Used by crng_reseed() to extract a new seed from the input pool. */ -static bool drain_entropy(void *buf, size_t nbytes); -/* Used by crng_make_state() to extract a new seed when crng_init==0. */ +/* Used by crng_reseed() and crng_make_state() to extract a new seed from the input pool. */ static void extract_entropy(void *buf, size_t nbytes); -/* - * This extracts a new crng key from the input pool, but only if there is a - * sufficient amount of entropy available, in order to mitigate bruteforcing - * of newly added bits. - */ +/* This extracts a new crng key from the input pool. */ static void crng_reseed(void) { unsigned long flags; @@ -272,9 +267,7 @@ static void crng_reseed(void) u8 key[CHACHA_KEY_SIZE]; bool finalize_init = false; - /* Only reseed if we can, to prevent brute forcing a small amount of new bits. */ - if (!drain_entropy(key, sizeof(key))) - return; + extract_entropy(key, sizeof(key)); /* * We copy the new key into the base_crng, overwriting the old one, @@ -346,10 +339,10 @@ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], } /* - * Return whether the crng seed is considered to be sufficiently - * old that a reseeding might be attempted. This happens if the last - * reseeding was CRNG_RESEED_INTERVAL ago, or during early boot, at - * an interval proportional to the uptime. + * Return whether the crng seed is considered to be sufficiently old + * that a reseeding is needed. This happens if the last reseeding + * was CRNG_RESEED_INTERVAL ago, or during early boot, at an interval + * proportional to the uptime. */ static bool crng_has_old_seed(void) { @@ -361,7 +354,7 @@ static bool crng_has_old_seed(void) if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2) WRITE_ONCE(early_boot, false); else - interval = max_t(unsigned int, 5 * HZ, + interval = max_t(unsigned int, CRNG_RESEED_START_INTERVAL, (unsigned int)uptime / 2 * HZ); } return time_after(jiffies, READ_ONCE(base_crng.birth) + interval); @@ -403,8 +396,8 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], } /* - * If the base_crng is old enough, we try to reseed, which in turn - * bumps the generation counter that we check below. + * If the base_crng is old enough, we reseed, which in turn bumps the + * generation counter that we check below. */ if (unlikely(crng_has_old_seed())) crng_reseed(); @@ -736,30 +729,24 @@ EXPORT_SYMBOL(get_random_bytes_arch); * * After which, if added entropy should be credited: * - * static void credit_entropy_bits(size_t nbits) + * static void credit_init_bits(size_t nbits) * - * Finally, extract entropy via these two, with the latter one - * setting the entropy count to zero and extracting only if there - * is POOL_MIN_BITS entropy credited prior: + * Finally, extract entropy via: * * static void extract_entropy(void *buf, size_t nbytes) - * static bool drain_entropy(void *buf, size_t nbytes) * **********************************************************************/ enum { POOL_BITS = BLAKE2S_HASH_SIZE * 8, - POOL_MIN_BITS = POOL_BITS, /* No point in settling for less. */ - POOL_FAST_INIT_BITS = POOL_MIN_BITS / 2 + POOL_INIT_BITS = POOL_BITS, /* No point in settling for less. */ + POOL_FAST_INIT_BITS = POOL_INIT_BITS / 2 }; -/* For notifying userspace should write into /dev/random. */ -static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); - static struct { struct blake2s_state hash; spinlock_t lock; - unsigned int entropy_count; + unsigned int init_bits; } input_pool = { .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, @@ -774,9 +761,9 @@ static void _mix_pool_bytes(const void *in, size_t nbytes) } /* - * This function adds bytes into the entropy "pool". It does not - * update the entropy estimate. The caller should call - * credit_entropy_bits if this is appropriate. + * This function adds bytes into the input pool. It does not + * update the initialization bit counter; the caller should call + * credit_init_bits if this is appropriate. */ static void mix_pool_bytes(const void *in, size_t nbytes) { @@ -833,43 +820,24 @@ static void extract_entropy(void *buf, size_t nbytes) memzero_explicit(&block, sizeof(block)); } -/* - * First we make sure we have POOL_MIN_BITS of entropy in the pool, and then we - * set the entropy count to zero (but don't actually touch any data). Only then - * can we extract a new key with extract_entropy(). - */ -static bool drain_entropy(void *buf, size_t nbytes) -{ - unsigned int entropy_count; - do { - entropy_count = READ_ONCE(input_pool.entropy_count); - if (entropy_count < POOL_MIN_BITS) - return false; - } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); - extract_entropy(buf, nbytes); - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - return true; -} - -static void credit_entropy_bits(size_t nbits) +static void credit_init_bits(size_t nbits) { - unsigned int entropy_count, orig, add; + unsigned int init_bits, orig, add; unsigned long flags; - if (!nbits) + if (crng_ready() || !nbits) return; add = min_t(size_t, nbits, POOL_BITS); do { - orig = READ_ONCE(input_pool.entropy_count); - entropy_count = min_t(unsigned int, POOL_BITS, orig + add); - } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); + orig = READ_ONCE(input_pool.init_bits); + init_bits = min_t(unsigned int, POOL_BITS, orig + add); + } while (cmpxchg(&input_pool.init_bits, orig, init_bits) != orig); - if (!crng_ready() && entropy_count >= POOL_MIN_BITS) + if (!crng_ready() && init_bits >= POOL_INIT_BITS) crng_reseed(); - else if (unlikely(crng_init == 0 && entropy_count >= POOL_FAST_INIT_BITS)) { + else if (unlikely(crng_init == 0 && init_bits >= POOL_FAST_INIT_BITS)) { spin_lock_irqsave(&base_crng.lock, flags); if (crng_init == 0) { extract_entropy(base_crng.key, sizeof(base_crng.key)); @@ -975,13 +943,10 @@ int __init rand_initialize(void) _mix_pool_bytes(&now, sizeof(now)); _mix_pool_bytes(utsname(), sizeof(*(utsname()))); - extract_entropy(base_crng.key, sizeof(base_crng.key)); - ++base_crng.generation; - - if (arch_init && trust_cpu && !crng_ready()) { - crng_init = 2; - pr_notice("crng init done (trusting CPU's manufacturer)\n"); - } + if (crng_ready()) + crng_reseed(); + else if (arch_init && trust_cpu) + credit_init_bits(BLAKE2S_BLOCK_SIZE * 8); if (ratelimit_disable) { urandom_warning.interval = 0; @@ -1035,6 +1000,9 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned int nu _mix_pool_bytes(&num, sizeof(num)); spin_unlock_irqrestore(&input_pool.lock, flags); + if (crng_ready()) + return; + /* * Calculate number of bits of randomness we probably added. * We take into account the first, second and third-order deltas @@ -1065,7 +1033,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned int nu * Round down by 1 bit on general principles, * and limit entropy estimate to 12 bits. */ - credit_entropy_bits(min_t(unsigned int, fls(delta >> 1), 11)); + credit_init_bits(min_t(unsigned int, fls(delta >> 1), 11)); } void add_input_randomness(unsigned int type, unsigned int code, @@ -1118,18 +1086,15 @@ void rand_initialize_disk(struct gendisk *disk) void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy) { + mix_pool_bytes(buffer, count); + credit_init_bits(entropy); + /* - * Throttle writing if we're above the trickle threshold. - * We'll be woken up again once below POOL_MIN_BITS, when - * the calling thread is about to terminate, or once - * CRNG_RESEED_INTERVAL has elapsed. + * Throttle writing to once every CRNG_RESEED_INTERVAL, unless + * we're not yet initialized. */ - wait_event_interruptible_timeout(random_write_wait, - kthread_should_stop() || - input_pool.entropy_count < POOL_MIN_BITS, - CRNG_RESEED_INTERVAL); - mix_pool_bytes(buffer, count); - credit_entropy_bits(entropy); + if (!kthread_should_stop() && crng_ready()) + schedule_timeout_interruptible(CRNG_RESEED_INTERVAL); } EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); @@ -1141,7 +1106,7 @@ void add_bootloader_randomness(const void *buf, size_t size) { mix_pool_bytes(buf, size); if (trust_bootloader) - credit_entropy_bits(size * 8); + credit_init_bits(size * 8); } EXPORT_SYMBOL_GPL(add_bootloader_randomness); @@ -1242,7 +1207,7 @@ static void mix_interrupt_randomness(struct work_struct *work) local_irq_enable(); mix_pool_bytes(pool, sizeof(pool)); - credit_entropy_bits(1); + credit_init_bits(1); memzero_explicit(pool, sizeof(pool)); } @@ -1289,7 +1254,7 @@ EXPORT_SYMBOL_GPL(add_interrupt_randomness); */ static void entropy_timer(struct timer_list *t) { - credit_entropy_bits(1); + credit_init_bits(1); } /* @@ -1382,16 +1347,8 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, static __poll_t random_poll(struct file *file, poll_table *wait) { - __poll_t mask; - poll_wait(file, &crng_init_wait, wait); - poll_wait(file, &random_write_wait, wait); - mask = 0; - if (crng_ready()) - mask |= EPOLLIN | EPOLLRDNORM; - if (input_pool.entropy_count < POOL_MIN_BITS) - mask |= EPOLLOUT | EPOLLWRNORM; - return mask; + return crng_ready() ? EPOLLIN | EPOLLRDNORM : EPOLLOUT | EPOLLWRNORM; } static int write_pool(const char __user *ubuf, size_t count) @@ -1464,7 +1421,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) switch (cmd) { case RNDGETENTCNT: /* Inherently racy, no point locking. */ - if (put_user(input_pool.entropy_count, p)) + if (put_user(input_pool.init_bits, p)) return -EFAULT; return 0; case RNDADDTOENTCNT: @@ -1474,7 +1431,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EFAULT; if (ent_count < 0) return -EINVAL; - credit_entropy_bits(ent_count); + credit_init_bits(ent_count); return 0; case RNDADDENTROPY: if (!capable(CAP_SYS_ADMIN)) @@ -1488,20 +1445,13 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) retval = write_pool((const char __user *)p, size); if (retval < 0) return retval; - credit_entropy_bits(ent_count); + credit_init_bits(ent_count); return 0; case RNDZAPENTCNT: case RNDCLEARPOOL: - /* - * Clear the entropy pool counters. We no longer clear - * the entropy pool, as that's silly. - */ + /* No longer has any effect. */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (xchg(&input_pool.entropy_count, 0) >= POOL_MIN_BITS) { - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } return 0; case RNDRESEEDCRNG: if (!capable(CAP_SYS_ADMIN)) @@ -1560,7 +1510,7 @@ const struct file_operations urandom_fops = { * * - write_wakeup_threshold - the amount of entropy in the input pool * below which write polls to /dev/random will unblock, requesting - * more entropy, tied to the POOL_MIN_BITS constant. It is writable + * more entropy, tied to the POOL_INIT_BITS constant. It is writable * to avoid breaking old userspaces, but writing to it does not * change any behavior of the RNG. * @@ -1575,7 +1525,7 @@ const struct file_operations urandom_fops = { #include static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ; -static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS; +static int sysctl_random_write_wakeup_bits = POOL_INIT_BITS; static int sysctl_poolsize = POOL_BITS; static u8 sysctl_bootid[UUID_SIZE]; @@ -1632,7 +1582,7 @@ struct ctl_table random_table[] = { }, { .procname = "entropy_avail", - .data = &input_pool.entropy_count, + .data = &input_pool.init_bits, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, -- GitLab From df4e319ea60f7586bd17fa2c325c9fa07d1b3459 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 6 May 2022 18:27:38 +0200 Subject: [PATCH 0257/4335] random: order timer entropy functions below interrupt functions commit a4b5c26b79ffdfcfb816c198f2fc2b1e7b5b580f upstream. There are no code changes here; this is just a reordering of functions, so that in subsequent commits, the timer entropy functions can call into the interrupt ones. Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 238 +++++++++++++++++++++--------------------- 1 file changed, 119 insertions(+), 119 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 5be537f80baba..b5c58997cd27d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -856,13 +856,13 @@ static void credit_init_bits(size_t nbits) * the above entropy accumulation routines: * * void add_device_randomness(const void *buf, size_t size); - * void add_input_randomness(unsigned int type, unsigned int code, - * unsigned int value); - * void add_disk_randomness(struct gendisk *disk); * void add_hwgenerator_randomness(const void *buffer, size_t count, * size_t entropy); * void add_bootloader_randomness(const void *buf, size_t size); * void add_interrupt_randomness(int irq); + * void add_input_randomness(unsigned int type, unsigned int code, + * unsigned int value); + * void add_disk_randomness(struct gendisk *disk); * * add_device_randomness() adds data to the input pool that * is likely to differ between two devices (or possibly even per boot). @@ -872,19 +872,6 @@ static void credit_init_bits(size_t nbits) * that might otherwise be identical and have very little entropy * available to them (particularly common in the embedded world). * - * add_input_randomness() uses the input layer interrupt timing, as well - * as the event type information from the hardware. - * - * add_disk_randomness() uses what amounts to the seek time of block - * layer request events, on a per-disk_devt basis, as input to the - * entropy pool. Note that high-speed solid state drives with very low - * seek times do not make for good sources of entropy, as their seek - * times are usually fairly consistent. - * - * The above two routines try to estimate how many bits of entropy - * to credit. They do this by keeping track of the first and second - * order deltas of the event timings. - * * add_hwgenerator_randomness() is for true hardware RNGs, and will credit * entropy as specified by the caller. If the entropy pool is full it will * block until more entropy is needed. @@ -898,6 +885,19 @@ static void credit_init_bits(size_t nbits) * as inputs, it feeds the input pool roughly once a second or after 64 * interrupts, crediting 1 bit of entropy for whichever comes first. * + * add_input_randomness() uses the input layer interrupt timing, as well + * as the event type information from the hardware. + * + * add_disk_randomness() uses what amounts to the seek time of block + * layer request events, on a per-disk_devt basis, as input to the + * entropy pool. Note that high-speed solid state drives with very low + * seek times do not make for good sources of entropy, as their seek + * times are usually fairly consistent. + * + * The last two routines try to estimate how many bits of entropy + * to credit. They do this by keeping track of the first and second + * order deltas of the event timings. + * **********************************************************************/ static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); @@ -975,109 +975,6 @@ void add_device_randomness(const void *buf, size_t size) } EXPORT_SYMBOL(add_device_randomness); -/* There is one of these per entropy source */ -struct timer_rand_state { - unsigned long last_time; - long last_delta, last_delta2; -}; - -/* - * This function adds entropy to the entropy "pool" by using timing - * delays. It uses the timer_rand_state structure to make an estimate - * of how many bits of entropy this call has added to the pool. - * - * The number "num" is also added to the pool - it should somehow describe - * the type of event which just happened. This is currently 0-255 for - * keyboard scan codes, and 256 upwards for interrupts. - */ -static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) -{ - unsigned long entropy = random_get_entropy(), now = jiffies, flags; - long delta, delta2, delta3; - - spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(&entropy, sizeof(entropy)); - _mix_pool_bytes(&num, sizeof(num)); - spin_unlock_irqrestore(&input_pool.lock, flags); - - if (crng_ready()) - return; - - /* - * Calculate number of bits of randomness we probably added. - * We take into account the first, second and third-order deltas - * in order to make our estimate. - */ - delta = now - READ_ONCE(state->last_time); - WRITE_ONCE(state->last_time, now); - - delta2 = delta - READ_ONCE(state->last_delta); - WRITE_ONCE(state->last_delta, delta); - - delta3 = delta2 - READ_ONCE(state->last_delta2); - WRITE_ONCE(state->last_delta2, delta2); - - if (delta < 0) - delta = -delta; - if (delta2 < 0) - delta2 = -delta2; - if (delta3 < 0) - delta3 = -delta3; - if (delta > delta2) - delta = delta2; - if (delta > delta3) - delta = delta3; - - /* - * delta is now minimum absolute delta. - * Round down by 1 bit on general principles, - * and limit entropy estimate to 12 bits. - */ - credit_init_bits(min_t(unsigned int, fls(delta >> 1), 11)); -} - -void add_input_randomness(unsigned int type, unsigned int code, - unsigned int value) -{ - static unsigned char last_value; - static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES }; - - /* Ignore autorepeat and the like. */ - if (value == last_value) - return; - - last_value = value; - add_timer_randomness(&input_timer_state, - (type << 4) ^ code ^ (code >> 4) ^ value); -} -EXPORT_SYMBOL_GPL(add_input_randomness); - -#ifdef CONFIG_BLOCK -void add_disk_randomness(struct gendisk *disk) -{ - if (!disk || !disk->random) - return; - /* First major is 1, so we get >= 0x200 here. */ - add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); -} -EXPORT_SYMBOL_GPL(add_disk_randomness); - -void rand_initialize_disk(struct gendisk *disk) -{ - struct timer_rand_state *state; - - /* - * If kzalloc returns null, we just won't use that entropy - * source. - */ - state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); - if (state) { - state->last_time = INITIAL_JIFFIES; - disk->random = state; - } -} -#endif - /* * Interface for in-kernel drivers of true hardware RNGs. * Those devices may produce endless random bits and will be throttled @@ -1239,6 +1136,109 @@ void add_interrupt_randomness(int irq) } EXPORT_SYMBOL_GPL(add_interrupt_randomness); +/* There is one of these per entropy source */ +struct timer_rand_state { + unsigned long last_time; + long last_delta, last_delta2; +}; + +/* + * This function adds entropy to the entropy "pool" by using timing + * delays. It uses the timer_rand_state structure to make an estimate + * of how many bits of entropy this call has added to the pool. + * + * The number "num" is also added to the pool - it should somehow describe + * the type of event which just happened. This is currently 0-255 for + * keyboard scan codes, and 256 upwards for interrupts. + */ +static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) +{ + unsigned long entropy = random_get_entropy(), now = jiffies, flags; + long delta, delta2, delta3; + + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(&entropy, sizeof(entropy)); + _mix_pool_bytes(&num, sizeof(num)); + spin_unlock_irqrestore(&input_pool.lock, flags); + + if (crng_ready()) + return; + + /* + * Calculate number of bits of randomness we probably added. + * We take into account the first, second and third-order deltas + * in order to make our estimate. + */ + delta = now - READ_ONCE(state->last_time); + WRITE_ONCE(state->last_time, now); + + delta2 = delta - READ_ONCE(state->last_delta); + WRITE_ONCE(state->last_delta, delta); + + delta3 = delta2 - READ_ONCE(state->last_delta2); + WRITE_ONCE(state->last_delta2, delta2); + + if (delta < 0) + delta = -delta; + if (delta2 < 0) + delta2 = -delta2; + if (delta3 < 0) + delta3 = -delta3; + if (delta > delta2) + delta = delta2; + if (delta > delta3) + delta = delta3; + + /* + * delta is now minimum absolute delta. + * Round down by 1 bit on general principles, + * and limit entropy estimate to 12 bits. + */ + credit_init_bits(min_t(unsigned int, fls(delta >> 1), 11)); +} + +void add_input_randomness(unsigned int type, unsigned int code, + unsigned int value) +{ + static unsigned char last_value; + static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES }; + + /* Ignore autorepeat and the like. */ + if (value == last_value) + return; + + last_value = value; + add_timer_randomness(&input_timer_state, + (type << 4) ^ code ^ (code >> 4) ^ value); +} +EXPORT_SYMBOL_GPL(add_input_randomness); + +#ifdef CONFIG_BLOCK +void add_disk_randomness(struct gendisk *disk) +{ + if (!disk || !disk->random) + return; + /* First major is 1, so we get >= 0x200 here. */ + add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); +} +EXPORT_SYMBOL_GPL(add_disk_randomness); + +void rand_initialize_disk(struct gendisk *disk) +{ + struct timer_rand_state *state; + + /* + * If kzalloc returns null, we just won't use that entropy + * source. + */ + state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); + if (state) { + state->last_time = INITIAL_JIFFIES; + disk->random = state; + } +} +#endif + /* * Each time the timer fires, we expect that we got an unpredictable * jump in the cycle counter. Even if the timer is running on another -- GitLab From 4649394c4749d0318bc47ad59ee7cb4b3b2fdac2 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 6 May 2022 18:30:51 +0200 Subject: [PATCH 0258/4335] random: do not use input pool from hard IRQs commit e3e33fc2ea7fcefd0d761db9d6219f83b4248f5c upstream. Years ago, a separate fast pool was added for interrupts, so that the cost associated with taking the input pool spinlocks and mixing into it would be avoided in places where latency is critical. However, one oversight was that add_input_randomness() and add_disk_randomness() still sometimes are called directly from the interrupt handler, rather than being deferred to a thread. This means that some unlucky interrupts will be caught doing a blake2s_compress() call and potentially spinning on input_pool.lock, which can also be taken by unprivileged users by writing into /dev/urandom. In order to fix this, add_timer_randomness() now checks whether it is being called from a hard IRQ and if so, just mixes into the per-cpu IRQ fast pool using fast_mix(), which is much faster and can be done lock-free. A nice consequence of this, as well, is that it means hard IRQ context FPU support is likely no longer useful. The entropy estimation algorithm used by add_timer_randomness() is also somewhat different than the one used for add_interrupt_randomness(). The former looks at deltas of deltas of deltas, while the latter just waits for 64 interrupts for one bit or for one second since the last bit. In order to bridge these, and since add_interrupt_randomness() runs after an add_timer_randomness() that's called from hard IRQ, we add to the fast pool credit the related amount, and then subtract one to account for add_interrupt_randomness()'s contribution. A downside of this, however, is that the num argument is potentially attacker controlled, which puts a bit more pressure on the fast_mix() sponge to do more than it's really intended to do. As a mitigating factor, the first 96 bits of input aren't attacker controlled (a cycle counter followed by zeros), which means it's essentially two rounds of siphash rather than one, which is somewhat better. It's also not that much different from add_interrupt_randomness()'s use of the irq stack instruction pointer register. Cc: Thomas Gleixner Cc: Filipe Manana Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Theodore Ts'o Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 51 ++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index b5c58997cd27d..402b7582df8c4 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1086,6 +1086,7 @@ static void mix_interrupt_randomness(struct work_struct *work) * we don't wind up "losing" some. */ unsigned long pool[2]; + unsigned int count; /* Check to see if we're running on the wrong CPU due to hotplug. */ local_irq_disable(); @@ -1099,12 +1100,13 @@ static void mix_interrupt_randomness(struct work_struct *work) * consistent view, before we reenable irqs again. */ memcpy(pool, fast_pool->pool, sizeof(pool)); + count = fast_pool->count; fast_pool->count = 0; fast_pool->last = jiffies; local_irq_enable(); mix_pool_bytes(pool, sizeof(pool)); - credit_init_bits(1); + credit_init_bits(max(1u, (count & U16_MAX) / 64)); memzero_explicit(pool, sizeof(pool)); } @@ -1144,22 +1146,30 @@ struct timer_rand_state { /* * This function adds entropy to the entropy "pool" by using timing - * delays. It uses the timer_rand_state structure to make an estimate - * of how many bits of entropy this call has added to the pool. - * - * The number "num" is also added to the pool - it should somehow describe - * the type of event which just happened. This is currently 0-255 for - * keyboard scan codes, and 256 upwards for interrupts. + * delays. It uses the timer_rand_state structure to make an estimate + * of how many bits of entropy this call has added to the pool. The + * value "num" is also added to the pool; it should somehow describe + * the type of event that just happened. */ static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) { unsigned long entropy = random_get_entropy(), now = jiffies, flags; long delta, delta2, delta3; + unsigned int bits; - spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(&entropy, sizeof(entropy)); - _mix_pool_bytes(&num, sizeof(num)); - spin_unlock_irqrestore(&input_pool.lock, flags); + /* + * If we're in a hard IRQ, add_interrupt_randomness() will be called + * sometime after, so mix into the fast pool. + */ + if (in_hardirq()) { + fast_mix(this_cpu_ptr(&irq_randomness)->pool, + (unsigned long[2]){ entropy, num }); + } else { + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(&entropy, sizeof(entropy)); + _mix_pool_bytes(&num, sizeof(num)); + spin_unlock_irqrestore(&input_pool.lock, flags); + } if (crng_ready()) return; @@ -1190,11 +1200,22 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned int nu delta = delta3; /* - * delta is now minimum absolute delta. - * Round down by 1 bit on general principles, - * and limit entropy estimate to 12 bits. + * delta is now minimum absolute delta. Round down by 1 bit + * on general principles, and limit entropy estimate to 11 bits. + */ + bits = min(fls(delta >> 1), 11); + + /* + * As mentioned above, if we're in a hard IRQ, add_interrupt_randomness() + * will run after this, which uses a different crediting scheme of 1 bit + * per every 64 interrupts. In order to let that function do accounting + * close to the one in this function, we credit a full 64/64 bit per bit, + * and then subtract one to account for the extra one added. */ - credit_init_bits(min_t(unsigned int, fls(delta >> 1), 11)); + if (in_hardirq()) + this_cpu_ptr(&irq_randomness)->count += max(1u, bits * 64) - 1; + else + credit_init_bits(bits); } void add_input_randomness(unsigned int type, unsigned int code, -- GitLab From 82caef84092e5d065932be53d6b341f22f68c649 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 6 May 2022 23:19:43 +0200 Subject: [PATCH 0259/4335] random: help compiler out with fast_mix() by using simpler arguments commit 791332b3cbb080510954a4c152ce02af8832eac9 upstream. Now that fast_mix() has more than one caller, gcc no longer inlines it. That's fine. But it also doesn't handle the compound literal argument we pass it very efficiently, nor does it handle the loop as well as it could. So just expand the code to spell out this function so that it generates the same code as it did before. Performance-wise, this now behaves as it did before the last commit. The difference in actual code size on x86 is 45 bytes, which is less than a cache line. Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 44 ++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 402b7582df8c4..607aa0055c665 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1031,25 +1031,30 @@ static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = { * and therefore this has no security on its own. s represents the * four-word SipHash state, while v represents a two-word input. */ -static void fast_mix(unsigned long s[4], const unsigned long v[2]) +static void fast_mix(unsigned long s[4], unsigned long v1, unsigned long v2) { - size_t i; - - for (i = 0; i < 2; ++i) { - s[3] ^= v[i]; #ifdef CONFIG_64BIT - s[0] += s[1]; s[1] = rol64(s[1], 13); s[1] ^= s[0]; s[0] = rol64(s[0], 32); - s[2] += s[3]; s[3] = rol64(s[3], 16); s[3] ^= s[2]; - s[0] += s[3]; s[3] = rol64(s[3], 21); s[3] ^= s[0]; - s[2] += s[1]; s[1] = rol64(s[1], 17); s[1] ^= s[2]; s[2] = rol64(s[2], 32); +#define PERM() do { \ + s[0] += s[1]; s[1] = rol64(s[1], 13); s[1] ^= s[0]; s[0] = rol64(s[0], 32); \ + s[2] += s[3]; s[3] = rol64(s[3], 16); s[3] ^= s[2]; \ + s[0] += s[3]; s[3] = rol64(s[3], 21); s[3] ^= s[0]; \ + s[2] += s[1]; s[1] = rol64(s[1], 17); s[1] ^= s[2]; s[2] = rol64(s[2], 32); \ +} while (0) #else - s[0] += s[1]; s[1] = rol32(s[1], 5); s[1] ^= s[0]; s[0] = rol32(s[0], 16); - s[2] += s[3]; s[3] = rol32(s[3], 8); s[3] ^= s[2]; - s[0] += s[3]; s[3] = rol32(s[3], 7); s[3] ^= s[0]; - s[2] += s[1]; s[1] = rol32(s[1], 13); s[1] ^= s[2]; s[2] = rol32(s[2], 16); +#define PERM() do { \ + s[0] += s[1]; s[1] = rol32(s[1], 5); s[1] ^= s[0]; s[0] = rol32(s[0], 16); \ + s[2] += s[3]; s[3] = rol32(s[3], 8); s[3] ^= s[2]; \ + s[0] += s[3]; s[3] = rol32(s[3], 7); s[3] ^= s[0]; \ + s[2] += s[1]; s[1] = rol32(s[1], 13); s[1] ^= s[2]; s[2] = rol32(s[2], 16); \ +} while (0) #endif - s[0] ^= v[i]; - } + + s[3] ^= v1; + PERM(); + s[0] ^= v1; + s[3] ^= v2; + PERM(); + s[0] ^= v2; } #ifdef CONFIG_SMP @@ -1119,10 +1124,8 @@ void add_interrupt_randomness(int irq) struct pt_regs *regs = get_irq_regs(); unsigned int new_count; - fast_mix(fast_pool->pool, (unsigned long[2]){ - entropy, - (regs ? instruction_pointer(regs) : _RET_IP_) ^ swab(irq) - }); + fast_mix(fast_pool->pool, entropy, + (regs ? instruction_pointer(regs) : _RET_IP_) ^ swab(irq)); new_count = ++fast_pool->count; if (new_count & MIX_INFLIGHT) @@ -1162,8 +1165,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned int nu * sometime after, so mix into the fast pool. */ if (in_hardirq()) { - fast_mix(this_cpu_ptr(&irq_randomness)->pool, - (unsigned long[2]){ entropy, num }); + fast_mix(this_cpu_ptr(&irq_randomness)->pool, entropy, num); } else { spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(&entropy, sizeof(entropy)); -- GitLab From f4cb809a90df1daeaab4e65ae2064918575b4e6d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 7 May 2022 14:03:46 +0200 Subject: [PATCH 0260/4335] siphash: use one source of truth for siphash permutations commit e73aaae2fa9024832e1f42e30c787c7baf61d014 upstream. The SipHash family of permutations is currently used in three places: - siphash.c itself, used in the ordinary way it was intended. - random32.c, in a construction from an anonymous contributor. - random.c, as part of its fast_mix function. Each one of these places reinvents the wheel with the same C code, same rotation constants, and same symmetry-breaking constants. This commit tidies things up a bit by placing macros for the permutations and constants into siphash.h, where each of the three .c users can access them. It also leaves a note dissuading more users of them from emerging. Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 30 +++++++----------------------- include/linux/prandom.h | 23 +++++++---------------- include/linux/siphash.h | 28 ++++++++++++++++++++++++++++ lib/siphash.c | 32 ++++++++++---------------------- 4 files changed, 52 insertions(+), 61 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 607aa0055c665..ce16bb7dfbf2b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -1016,12 +1017,11 @@ struct fast_pool { static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = { #ifdef CONFIG_64BIT - /* SipHash constants */ - .pool = { 0x736f6d6570736575UL, 0x646f72616e646f6dUL, - 0x6c7967656e657261UL, 0x7465646279746573UL } +#define FASTMIX_PERM SIPHASH_PERMUTATION + .pool = { SIPHASH_CONST_0, SIPHASH_CONST_1, SIPHASH_CONST_2, SIPHASH_CONST_3 } #else - /* HalfSipHash constants */ - .pool = { 0, 0, 0x6c796765U, 0x74656462U } +#define FASTMIX_PERM HSIPHASH_PERMUTATION + .pool = { HSIPHASH_CONST_0, HSIPHASH_CONST_1, HSIPHASH_CONST_2, HSIPHASH_CONST_3 } #endif }; @@ -1033,27 +1033,11 @@ static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = { */ static void fast_mix(unsigned long s[4], unsigned long v1, unsigned long v2) { -#ifdef CONFIG_64BIT -#define PERM() do { \ - s[0] += s[1]; s[1] = rol64(s[1], 13); s[1] ^= s[0]; s[0] = rol64(s[0], 32); \ - s[2] += s[3]; s[3] = rol64(s[3], 16); s[3] ^= s[2]; \ - s[0] += s[3]; s[3] = rol64(s[3], 21); s[3] ^= s[0]; \ - s[2] += s[1]; s[1] = rol64(s[1], 17); s[1] ^= s[2]; s[2] = rol64(s[2], 32); \ -} while (0) -#else -#define PERM() do { \ - s[0] += s[1]; s[1] = rol32(s[1], 5); s[1] ^= s[0]; s[0] = rol32(s[0], 16); \ - s[2] += s[3]; s[3] = rol32(s[3], 8); s[3] ^= s[2]; \ - s[0] += s[3]; s[3] = rol32(s[3], 7); s[3] ^= s[0]; \ - s[2] += s[1]; s[1] = rol32(s[1], 13); s[1] ^= s[2]; s[2] = rol32(s[2], 16); \ -} while (0) -#endif - s[3] ^= v1; - PERM(); + FASTMIX_PERM(s[0], s[1], s[2], s[3]); s[0] ^= v1; s[3] ^= v2; - PERM(); + FASTMIX_PERM(s[0], s[1], s[2], s[3]); s[0] ^= v2; } diff --git a/include/linux/prandom.h b/include/linux/prandom.h index 056d31317e499..a4aadd2dc153e 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -10,6 +10,7 @@ #include #include +#include u32 prandom_u32(void); void prandom_bytes(void *buf, size_t nbytes); @@ -27,15 +28,10 @@ DECLARE_PER_CPU(unsigned long, net_rand_noise); * The core SipHash round function. Each line can be executed in * parallel given enough CPU resources. */ -#define PRND_SIPROUND(v0, v1, v2, v3) ( \ - v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \ - v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \ - v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \ - v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \ -) +#define PRND_SIPROUND(v0, v1, v2, v3) SIPHASH_PERMUTATION(v0, v1, v2, v3) -#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261) -#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573) +#define PRND_K0 (SIPHASH_CONST_0 ^ SIPHASH_CONST_2) +#define PRND_K1 (SIPHASH_CONST_1 ^ SIPHASH_CONST_3) #elif BITS_PER_LONG == 32 /* @@ -43,14 +39,9 @@ DECLARE_PER_CPU(unsigned long, net_rand_noise); * This is weaker, but 32-bit machines are not used for high-traffic * applications, so there is less output for an attacker to analyze. */ -#define PRND_SIPROUND(v0, v1, v2, v3) ( \ - v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \ - v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \ - v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \ - v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \ -) -#define PRND_K0 0x6c796765 -#define PRND_K1 0x74656462 +#define PRND_SIPROUND(v0, v1, v2, v3) HSIPHASH_PERMUTATION(v0, v1, v2, v3) +#define PRND_K0 (HSIPHASH_CONST_0 ^ HSIPHASH_CONST_2) +#define PRND_K1 (HSIPHASH_CONST_1 ^ HSIPHASH_CONST_3) #else #error Unsupported BITS_PER_LONG diff --git a/include/linux/siphash.h b/include/linux/siphash.h index 0cda61855d907..0bb5ecd507bef 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -136,4 +136,32 @@ static inline u32 hsiphash(const void *data, size_t len, return ___hsiphash_aligned(data, len, key); } +/* + * These macros expose the raw SipHash and HalfSipHash permutations. + * Do not use them directly! If you think you have a use for them, + * be sure to CC the maintainer of this file explaining why. + */ + +#define SIPHASH_PERMUTATION(a, b, c, d) ( \ + (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \ + (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \ + (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \ + (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32)) + +#define SIPHASH_CONST_0 0x736f6d6570736575ULL +#define SIPHASH_CONST_1 0x646f72616e646f6dULL +#define SIPHASH_CONST_2 0x6c7967656e657261ULL +#define SIPHASH_CONST_3 0x7465646279746573ULL + +#define HSIPHASH_PERMUTATION(a, b, c, d) ( \ + (a) += (b), (b) = rol32((b), 5), (b) ^= (a), (a) = rol32((a), 16), \ + (c) += (d), (d) = rol32((d), 8), (d) ^= (c), \ + (a) += (d), (d) = rol32((d), 7), (d) ^= (a), \ + (c) += (b), (b) = rol32((b), 13), (b) ^= (c), (c) = rol32((c), 16)) + +#define HSIPHASH_CONST_0 0U +#define HSIPHASH_CONST_1 0U +#define HSIPHASH_CONST_2 0x6c796765U +#define HSIPHASH_CONST_3 0x74656462U + #endif /* _LINUX_SIPHASH_H */ diff --git a/lib/siphash.c b/lib/siphash.c index 72b9068ab57bf..71d315a6ad623 100644 --- a/lib/siphash.c +++ b/lib/siphash.c @@ -18,19 +18,13 @@ #include #endif -#define SIPROUND \ - do { \ - v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \ - v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \ - v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \ - v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \ - } while (0) +#define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3) #define PREAMBLE(len) \ - u64 v0 = 0x736f6d6570736575ULL; \ - u64 v1 = 0x646f72616e646f6dULL; \ - u64 v2 = 0x6c7967656e657261ULL; \ - u64 v3 = 0x7465646279746573ULL; \ + u64 v0 = SIPHASH_CONST_0; \ + u64 v1 = SIPHASH_CONST_1; \ + u64 v2 = SIPHASH_CONST_2; \ + u64 v3 = SIPHASH_CONST_3; \ u64 b = ((u64)(len)) << 56; \ v3 ^= key->key[1]; \ v2 ^= key->key[0]; \ @@ -389,19 +383,13 @@ u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third, } EXPORT_SYMBOL(hsiphash_4u32); #else -#define HSIPROUND \ - do { \ - v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \ - v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \ - v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \ - v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \ - } while (0) +#define HSIPROUND HSIPHASH_PERMUTATION(v0, v1, v2, v3) #define HPREAMBLE(len) \ - u32 v0 = 0; \ - u32 v1 = 0; \ - u32 v2 = 0x6c796765U; \ - u32 v3 = 0x74656462U; \ + u32 v0 = HSIPHASH_CONST_0; \ + u32 v1 = HSIPHASH_CONST_1; \ + u32 v2 = HSIPHASH_CONST_2; \ + u32 v3 = HSIPHASH_CONST_3; \ u32 b = ((u32)(len)) << 24; \ v3 ^= key->key[1]; \ v2 ^= key->key[0]; \ -- GitLab From 23a1b984f4b878faa1be22f5fcbb974f3ddd4018 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 8 May 2022 13:20:30 +0200 Subject: [PATCH 0261/4335] random: use symbolic constants for crng_init states commit e3d2c5e79a999aa4e7d6f0127e16d3da5a4ff70d upstream. crng_init represents a state machine, with three states, and various rules for transitions. For the longest time, we've been managing these with "0", "1", and "2", and expecting people to figure it out. To make the code more obvious, replace these with proper enum values representing the transition, and then redocument what each of these states mean. Reviewed-by: Dominik Brodowski Cc: Joe Perches Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index ce16bb7dfbf2b..58eb1b37a79cc 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -70,16 +70,16 @@ *********************************************************************/ /* - * crng_init = 0 --> Uninitialized - * 1 --> Initialized - * 2 --> Initialized from input_pool - * * crng_init is protected by base_crng->lock, and only increases - * its value (from 0->1->2). + * its value (from empty->early->ready). */ -static int crng_init = 0; -#define crng_ready() (likely(crng_init > 1)) -/* Various types of waiters for crng_init->2 transition. */ +static enum { + CRNG_EMPTY = 0, /* Little to no entropy collected */ + CRNG_EARLY = 1, /* At least POOL_EARLY_BITS collected */ + CRNG_READY = 2 /* Fully initialized with POOL_READY_BITS collected */ +} crng_init = CRNG_EMPTY; +#define crng_ready() (likely(crng_init >= CRNG_READY)) +/* Various types of waiters for crng_init->CRNG_READY transition. */ static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); static struct fasync_struct *fasync; static DEFINE_SPINLOCK(random_ready_chain_lock); @@ -284,7 +284,7 @@ static void crng_reseed(void) WRITE_ONCE(base_crng.generation, next_gen); WRITE_ONCE(base_crng.birth, jiffies); if (!crng_ready()) { - crng_init = 2; + crng_init = CRNG_READY; finalize_init = true; } spin_unlock_irqrestore(&base_crng.lock, flags); @@ -378,7 +378,7 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], * For the fast path, we check whether we're ready, unlocked first, and * then re-check once locked later. In the case where we're really not * ready, we do fast key erasure with the base_crng directly, extracting - * when crng_init==0. + * when crng_init is CRNG_EMPTY. */ if (!crng_ready()) { bool ready; @@ -386,7 +386,7 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], spin_lock_irqsave(&base_crng.lock, flags); ready = crng_ready(); if (!ready) { - if (crng_init == 0) + if (crng_init == CRNG_EMPTY) extract_entropy(base_crng.key, sizeof(base_crng.key)); crng_fast_key_erasure(base_crng.key, chacha_state, random_data, random_data_len); @@ -740,8 +740,8 @@ EXPORT_SYMBOL(get_random_bytes_arch); enum { POOL_BITS = BLAKE2S_HASH_SIZE * 8, - POOL_INIT_BITS = POOL_BITS, /* No point in settling for less. */ - POOL_FAST_INIT_BITS = POOL_INIT_BITS / 2 + POOL_READY_BITS = POOL_BITS, /* When crng_init->CRNG_READY */ + POOL_EARLY_BITS = POOL_READY_BITS / 2 /* When crng_init->CRNG_EARLY */ }; static struct { @@ -836,13 +836,13 @@ static void credit_init_bits(size_t nbits) init_bits = min_t(unsigned int, POOL_BITS, orig + add); } while (cmpxchg(&input_pool.init_bits, orig, init_bits) != orig); - if (!crng_ready() && init_bits >= POOL_INIT_BITS) + if (!crng_ready() && init_bits >= POOL_READY_BITS) crng_reseed(); - else if (unlikely(crng_init == 0 && init_bits >= POOL_FAST_INIT_BITS)) { + else if (unlikely(crng_init == CRNG_EMPTY && init_bits >= POOL_EARLY_BITS)) { spin_lock_irqsave(&base_crng.lock, flags); - if (crng_init == 0) { + if (crng_init == CRNG_EMPTY) { extract_entropy(base_crng.key, sizeof(base_crng.key)); - crng_init = 1; + crng_init = CRNG_EARLY; } spin_unlock_irqrestore(&base_crng.lock, flags); } @@ -1517,7 +1517,7 @@ const struct file_operations urandom_fops = { * * - write_wakeup_threshold - the amount of entropy in the input pool * below which write polls to /dev/random will unblock, requesting - * more entropy, tied to the POOL_INIT_BITS constant. It is writable + * more entropy, tied to the POOL_READY_BITS constant. It is writable * to avoid breaking old userspaces, but writing to it does not * change any behavior of the RNG. * @@ -1532,7 +1532,7 @@ const struct file_operations urandom_fops = { #include static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ; -static int sysctl_random_write_wakeup_bits = POOL_INIT_BITS; +static int sysctl_random_write_wakeup_bits = POOL_READY_BITS; static int sysctl_poolsize = POOL_BITS; static u8 sysctl_bootid[UUID_SIZE]; -- GitLab From c4e600154ac07d7b65c73611b4e408bb449f1b44 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 9 May 2022 13:40:55 +0200 Subject: [PATCH 0262/4335] random: avoid initializing twice in credit race commit fed7ef061686cc813b1f3d8d0edc6c35b4d3537b upstream. Since all changes of crng_init now go through credit_init_bits(), we can fix a long standing race in which two concurrent callers of credit_init_bits() have the new bit count >= some threshold, but are doing so with crng_init as a lower threshold, checked outside of a lock, resulting in crng_reseed() or similar being called twice. In order to fix this, we can use the original cmpxchg value of the bit count, and only change crng_init when the bit count transitions from below a threshold to meeting the threshold. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 58eb1b37a79cc..32c7863930e2d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -823,7 +823,7 @@ static void extract_entropy(void *buf, size_t nbytes) static void credit_init_bits(size_t nbits) { - unsigned int init_bits, orig, add; + unsigned int new, orig, add; unsigned long flags; if (crng_ready() || !nbits) @@ -833,12 +833,12 @@ static void credit_init_bits(size_t nbits) do { orig = READ_ONCE(input_pool.init_bits); - init_bits = min_t(unsigned int, POOL_BITS, orig + add); - } while (cmpxchg(&input_pool.init_bits, orig, init_bits) != orig); + new = min_t(unsigned int, POOL_BITS, orig + add); + } while (cmpxchg(&input_pool.init_bits, orig, new) != orig); - if (!crng_ready() && init_bits >= POOL_READY_BITS) + if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) crng_reseed(); - else if (unlikely(crng_init == CRNG_EMPTY && init_bits >= POOL_EARLY_BITS)) { + else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) { spin_lock_irqsave(&base_crng.lock, flags); if (crng_init == CRNG_EMPTY) { extract_entropy(base_crng.key, sizeof(base_crng.key)); -- GitLab From 272b79432f66de60efc9735b85cd922ce2824923 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 9 May 2022 13:53:24 +0200 Subject: [PATCH 0263/4335] random: move initialization out of reseeding hot path commit 68c9c8b192c6dae9be6278e98ee44029d5da2d31 upstream. Initialization happens once -- by way of credit_init_bits() -- and then it never happens again. Therefore, it doesn't need to be in crng_reseed(), which is a hot path that is called multiple times. It also doesn't make sense to have there, as initialization activity is better associated with initialization routines. After the prior commit, crng_reseed() now won't be called by multiple concurrent callers, which means that we can safely move the "finialize_init" logic into crng_init_bits() unconditionally. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 32c7863930e2d..6f9d498a1916b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -266,7 +266,6 @@ static void crng_reseed(void) unsigned long flags; unsigned long next_gen; u8 key[CHACHA_KEY_SIZE]; - bool finalize_init = false; extract_entropy(key, sizeof(key)); @@ -283,28 +282,10 @@ static void crng_reseed(void) ++next_gen; WRITE_ONCE(base_crng.generation, next_gen); WRITE_ONCE(base_crng.birth, jiffies); - if (!crng_ready()) { + if (!crng_ready()) crng_init = CRNG_READY; - finalize_init = true; - } spin_unlock_irqrestore(&base_crng.lock, flags); memzero_explicit(key, sizeof(key)); - if (finalize_init) { - process_random_ready_list(); - wake_up_interruptible(&crng_init_wait); - kill_fasync(&fasync, SIGIO, POLL_IN); - pr_notice("crng init done\n"); - if (unseeded_warning.missed) { - pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", - unseeded_warning.missed); - unseeded_warning.missed = 0; - } - if (urandom_warning.missed) { - pr_notice("%d urandom warning(s) missed due to ratelimiting\n", - urandom_warning.missed); - urandom_warning.missed = 0; - } - } } /* @@ -836,10 +817,25 @@ static void credit_init_bits(size_t nbits) new = min_t(unsigned int, POOL_BITS, orig + add); } while (cmpxchg(&input_pool.init_bits, orig, new) != orig); - if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) - crng_reseed(); - else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) { + if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) { + crng_reseed(); /* Sets crng_init to CRNG_READY under base_crng.lock. */ + process_random_ready_list(); + wake_up_interruptible(&crng_init_wait); + kill_fasync(&fasync, SIGIO, POLL_IN); + pr_notice("crng init done\n"); + if (unseeded_warning.missed) { + pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", + unseeded_warning.missed); + unseeded_warning.missed = 0; + } + if (urandom_warning.missed) { + pr_notice("%d urandom warning(s) missed due to ratelimiting\n", + urandom_warning.missed); + urandom_warning.missed = 0; + } + } else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) { spin_lock_irqsave(&base_crng.lock, flags); + /* Check if crng_init is CRNG_EMPTY, to avoid race with crng_reseed(). */ if (crng_init == CRNG_EMPTY) { extract_entropy(base_crng.key, sizeof(base_crng.key)); crng_init = CRNG_EARLY; -- GitLab From 8df752b82ec59b7d23c3d56b0d9bf0fdd140ce80 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 9 May 2022 16:13:18 +0200 Subject: [PATCH 0264/4335] random: remove ratelimiting for in-kernel unseeded randomness commit cc1e127bfa95b5fb2f9307e7168bf8b2b45b4c5e upstream. The CONFIG_WARN_ALL_UNSEEDED_RANDOM debug option controls whether the kernel warns about all unseeded randomness or just the first instance. There's some complicated rate limiting and comparison to the previous caller, such that even with CONFIG_WARN_ALL_UNSEEDED_RANDOM enabled, developers still don't see all the messages or even an accurate count of how many were missed. This is the result of basically parallel mechanisms aimed at accomplishing more or less the same thing, added at different points in random.c history, which sort of compete with the first-instance-only limiting we have now. It turns out, however, that nobody cares about the first unseeded randomness instance of in-kernel users. The same first user has been there for ages now, and nobody is doing anything about it. It isn't even clear that anybody _can_ do anything about it. Most places that can do something about it have switched over to using get_random_bytes_wait() or wait_for_random_bytes(), which is the right thing to do, but there is still much code that needs randomness sometimes during init, and as a geeneral rule, if you're not using one of the _wait functions or the readiness notifier callback, you're bound to be doing it wrong just based on that fact alone. So warning about this same first user that can't easily change is simply not an effective mechanism for anything at all. Users can't do anything about it, as the Kconfig text points out -- the problem isn't in userspace code -- and kernel developers don't or more often can't react to it. Instead, show the warning for all instances when CONFIG_WARN_ALL_UNSEEDED_RANDOM is set, so that developers can debug things need be, or if it isn't set, don't show a warning at all. At the same time, CONFIG_WARN_ALL_UNSEEDED_RANDOM now implies setting random.ratelimit_disable=1 on by default, since if you care about one you probably care about the other too. And we can clean up usage around the related urandom_warning ratelimiter as well (whose behavior isn't changing), so that it properly counts missed messages after the 10 message threshold is reached. Cc: Theodore Ts'o Cc: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 61 +++++++++++++------------------------------ lib/Kconfig.debug | 3 +-- 2 files changed, 19 insertions(+), 45 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 6f9d498a1916b..c40b2d96dc4c5 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -86,11 +86,10 @@ static DEFINE_SPINLOCK(random_ready_chain_lock); static RAW_NOTIFIER_HEAD(random_ready_chain); /* Control how we warn userspace. */ -static struct ratelimit_state unseeded_warning = - RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3); static struct ratelimit_state urandom_warning = RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3); -static int ratelimit_disable __read_mostly; +static int ratelimit_disable __read_mostly = + IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM); module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); @@ -183,27 +182,15 @@ static void process_random_ready_list(void) spin_unlock_irqrestore(&random_ready_chain_lock, flags); } -#define warn_unseeded_randomness(previous) \ - _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous)) +#define warn_unseeded_randomness() \ + _warn_unseeded_randomness(__func__, (void *)_RET_IP_) -static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous) +static void _warn_unseeded_randomness(const char *func_name, void *caller) { -#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM - const bool print_once = false; -#else - static bool print_once __read_mostly; -#endif - - if (print_once || crng_ready() || - (previous && (caller == READ_ONCE(*previous)))) + if (!IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) || crng_ready()) return; - WRITE_ONCE(*previous, caller); -#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM - print_once = true; -#endif - if (__ratelimit(&unseeded_warning)) - printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", - func_name, caller, crng_init); + printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", + func_name, caller, crng_init); } @@ -456,9 +443,7 @@ static void _get_random_bytes(void *buf, size_t nbytes) */ void get_random_bytes(void *buf, size_t nbytes) { - static void *previous; - - warn_unseeded_randomness(&previous); + warn_unseeded_randomness(); _get_random_bytes(buf, nbytes); } EXPORT_SYMBOL(get_random_bytes); @@ -554,10 +539,9 @@ u64 get_random_u64(void) u64 ret; unsigned long flags; struct batched_entropy *batch; - static void *previous; unsigned long next_gen; - warn_unseeded_randomness(&previous); + warn_unseeded_randomness(); if (!crng_ready()) { _get_random_bytes(&ret, sizeof(ret)); @@ -593,10 +577,9 @@ u32 get_random_u32(void) u32 ret; unsigned long flags; struct batched_entropy *batch; - static void *previous; unsigned long next_gen; - warn_unseeded_randomness(&previous); + warn_unseeded_randomness(); if (!crng_ready()) { _get_random_bytes(&ret, sizeof(ret)); @@ -823,16 +806,9 @@ static void credit_init_bits(size_t nbits) wake_up_interruptible(&crng_init_wait); kill_fasync(&fasync, SIGIO, POLL_IN); pr_notice("crng init done\n"); - if (unseeded_warning.missed) { - pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", - unseeded_warning.missed); - unseeded_warning.missed = 0; - } - if (urandom_warning.missed) { + if (urandom_warning.missed) pr_notice("%d urandom warning(s) missed due to ratelimiting\n", urandom_warning.missed); - urandom_warning.missed = 0; - } } else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) { spin_lock_irqsave(&base_crng.lock, flags); /* Check if crng_init is CRNG_EMPTY, to avoid race with crng_reseed(). */ @@ -945,10 +921,6 @@ int __init rand_initialize(void) else if (arch_init && trust_cpu) credit_init_bits(BLAKE2S_BLOCK_SIZE * 8); - if (ratelimit_disable) { - urandom_warning.interval = 0; - unseeded_warning.interval = 0; - } return 0; } @@ -1394,11 +1366,14 @@ static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, { static int maxwarn = 10; - if (!crng_ready() && maxwarn > 0) { - maxwarn--; - if (__ratelimit(&urandom_warning)) + if (!crng_ready()) { + if (!ratelimit_disable && maxwarn <= 0) + ++urandom_warning.missed; + else if (ratelimit_disable || __ratelimit(&urandom_warning)) { + --maxwarn; pr_notice("%s: uninitialized urandom read (%zd bytes read)\n", current->comm, nbytes); + } } return get_random_bytes_user(buf, nbytes); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 55e89b237b6f8..7fd3fa05379e2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1559,8 +1559,7 @@ config WARN_ALL_UNSEEDED_RANDOM so architecture maintainers really need to do what they can to get the CRNG seeded sooner after the system is booted. However, since users cannot do anything actionable to - address this, by default the kernel will issue only a single - warning for the first use of unseeded randomness. + address this, by default this option is disabled. Say Y here if you want to receive warnings for all uses of unseeded randomness. This will be of use primarily for -- GitLab From ac0172992c9427b84d17d9f89dbbeb38e4005eaa Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 10 May 2022 15:20:42 +0200 Subject: [PATCH 0265/4335] random: use proper jiffies comparison macro commit 8a5b8a4a4ceb353b4dd5bafd09e2b15751bcdb51 upstream. This expands to exactly the same code that it replaces, but makes things consistent by using the same macro for jiffy comparisons throughout. Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index c40b2d96dc4c5..328e8474017fb 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -326,7 +326,7 @@ static bool crng_has_old_seed(void) interval = max_t(unsigned int, CRNG_RESEED_START_INTERVAL, (unsigned int)uptime / 2 * HZ); } - return time_after(jiffies, READ_ONCE(base_crng.birth) + interval); + return time_is_before_jiffies(READ_ONCE(base_crng.birth) + interval); } /* -- GitLab From 11cce5040c29485f0891e4c2278c5a9d3e65a80e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 5 May 2022 02:20:22 +0200 Subject: [PATCH 0266/4335] random: handle latent entropy and command line from random_init() commit 2f14062bb14b0fcfcc21e6dc7d5b5c0d25966164 upstream. Currently, start_kernel() adds latent entropy and the command line to the entropy bool *after* the RNG has been initialized, deferring when it's actually used by things like stack canaries until the next time the pool is seeded. This surely is not intended. Rather than splitting up which entropy gets added where and when between start_kernel() and random_init(), just do everything in random_init(), which should eliminate these kinds of bugs in the future. While we're at it, rename the awkwardly titled "rand_initialize()" to the more standard "random_init()" nomenclature. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 13 ++++++++----- include/linux/random.h | 16 +++++++--------- init/main.c | 10 +++------- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 328e8474017fb..b638d09c2a037 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -888,12 +888,13 @@ early_param("random.trust_bootloader", parse_trust_bootloader); /* * The first collection of entropy occurs at system boot while interrupts - * are still turned off. Here we push in RDSEED, a timestamp, and utsname(). - * Depending on the above configuration knob, RDSEED may be considered - * sufficient for initialization. Note that much earlier setup may already - * have pushed entropy into the input pool by the time we get here. + * are still turned off. Here we push in latent entropy, RDSEED, a timestamp, + * utsname(), and the command line. Depending on the above configuration knob, + * RDSEED may be considered sufficient for initialization. Note that much + * earlier setup may already have pushed entropy into the input pool by the + * time we get here. */ -int __init rand_initialize(void) +int __init random_init(const char *command_line) { size_t i; ktime_t now = ktime_get_real(); @@ -915,6 +916,8 @@ int __init rand_initialize(void) } _mix_pool_bytes(&now, sizeof(now)); _mix_pool_bytes(utsname(), sizeof(*(utsname()))); + _mix_pool_bytes(command_line, strlen(command_line)); + add_latent_entropy(); if (crng_ready()) crng_reseed(); diff --git a/include/linux/random.h b/include/linux/random.h index d4abda9e23487..588e31dc10af3 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -14,26 +14,24 @@ struct notifier_block; extern void add_device_randomness(const void *, size_t); extern void add_bootloader_randomness(const void *, size_t); +extern void add_input_randomness(unsigned int type, unsigned int code, + unsigned int value) __latent_entropy; +extern void add_interrupt_randomness(int irq) __latent_entropy; +extern void add_hwgenerator_randomness(const void *buffer, size_t count, + size_t entropy); #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) static inline void add_latent_entropy(void) { - add_device_randomness((const void *)&latent_entropy, - sizeof(latent_entropy)); + add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); } #else static inline void add_latent_entropy(void) {} #endif -extern void add_input_randomness(unsigned int type, unsigned int code, - unsigned int value) __latent_entropy; -extern void add_interrupt_randomness(int irq) __latent_entropy; -extern void add_hwgenerator_randomness(const void *buffer, size_t count, - size_t entropy); - extern void get_random_bytes(void *buf, size_t nbytes); extern int wait_for_random_bytes(void); -extern int __init rand_initialize(void); +extern int __init random_init(const char *command_line); extern bool rng_is_initialized(void); extern int register_random_ready_notifier(struct notifier_block *nb); extern int unregister_random_ready_notifier(struct notifier_block *nb); diff --git a/init/main.c b/init/main.c index 2b9ba7ea42eca..cf79b5a766cb1 100644 --- a/init/main.c +++ b/init/main.c @@ -1046,15 +1046,11 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) /* * For best initial stack canary entropy, prepare it after: * - setup_arch() for any UEFI RNG entropy and boot cmdline access - * - timekeeping_init() for ktime entropy used in rand_initialize() + * - timekeeping_init() for ktime entropy used in random_init() * - time_init() for making random_get_entropy() work on some platforms - * - rand_initialize() to get any arch-specific entropy like RDRAND - * - add_latent_entropy() to get any latent entropy - * - adding command line entropy + * - random_init() to initialize the RNG from from early entropy sources */ - rand_initialize(); - add_latent_entropy(); - add_device_randomness(command_line, strlen(command_line)); + random_init(command_line); boot_init_stack_canary(); perf_event_init(); -- GitLab From 4e5e6754a4b6b7c6af8b2c87ed62ec0c5a5eb313 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 12 May 2022 15:32:26 +0200 Subject: [PATCH 0267/4335] random: credit architectural init the exact amount commit 12e45a2a6308105469968951e6d563e8f4fea187 upstream. RDRAND and RDSEED can fail sometimes, which is fine. We currently initialize the RNG with 512 bits of RDRAND/RDSEED. We only need 256 bits of those to succeed in order to initialize the RNG. Instead of the current "all or nothing" approach, actually credit these contributions the amount that is actually contributed. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index b638d09c2a037..472a846de032b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -896,9 +896,8 @@ early_param("random.trust_bootloader", parse_trust_bootloader); */ int __init random_init(const char *command_line) { - size_t i; ktime_t now = ktime_get_real(); - bool arch_init = true; + unsigned int i, arch_bytes; unsigned long rv; #if defined(LATENT_ENTROPY_PLUGIN) @@ -906,11 +905,12 @@ int __init random_init(const char *command_line) _mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed)); #endif - for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) { + for (i = 0, arch_bytes = BLAKE2S_BLOCK_SIZE; + i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) { if (!arch_get_random_seed_long_early(&rv) && !arch_get_random_long_early(&rv)) { rv = random_get_entropy(); - arch_init = false; + arch_bytes -= sizeof(rv); } _mix_pool_bytes(&rv, sizeof(rv)); } @@ -921,8 +921,8 @@ int __init random_init(const char *command_line) if (crng_ready()) crng_reseed(); - else if (arch_init && trust_cpu) - credit_init_bits(BLAKE2S_BLOCK_SIZE * 8); + else if (trust_cpu) + credit_init_bits(arch_bytes * 8); return 0; } -- GitLab From c6ae9d65bcdbb078f1c0befdcdd1d53805e44fe6 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 3 May 2022 15:30:45 +0200 Subject: [PATCH 0268/4335] random: use static branch for crng_ready() commit f5bda35fba615ace70a656d4700423fa6c9bebee upstream. Since crng_ready() is only false briefly during initialization and then forever after becomes true, we don't need to evaluate it after, making it a prime candidate for a static branch. One complication, however, is that it changes state in a particular call to credit_init_bits(), which might be made from atomic context, which means we must kick off a workqueue to change the static key. Further complicating things, credit_init_bits() may be called sufficiently early on in system initialization such that system_wq is NULL. Fortunately, there exists the nice function execute_in_process_context(), which will immediately execute the function if !in_interrupt(), and otherwise defer it to a workqueue. During early init, before workqueues are available, in_interrupt() is always false, because interrupts haven't even been enabled yet, which means the function in that case executes immediately. Later on, after workqueues are available, in_interrupt() might be true, but in that case, the work is queued in system_wq and all goes well. Cc: Theodore Ts'o Cc: Sultan Alsawaf Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 472a846de032b..9611c9069985f 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -77,8 +77,9 @@ static enum { CRNG_EMPTY = 0, /* Little to no entropy collected */ CRNG_EARLY = 1, /* At least POOL_EARLY_BITS collected */ CRNG_READY = 2 /* Fully initialized with POOL_READY_BITS collected */ -} crng_init = CRNG_EMPTY; -#define crng_ready() (likely(crng_init >= CRNG_READY)) +} crng_init __read_mostly = CRNG_EMPTY; +static DEFINE_STATIC_KEY_FALSE(crng_is_ready); +#define crng_ready() (static_branch_likely(&crng_is_ready) || crng_init >= CRNG_READY) /* Various types of waiters for crng_init->CRNG_READY transition. */ static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); static struct fasync_struct *fasync; @@ -108,6 +109,11 @@ bool rng_is_initialized(void) } EXPORT_SYMBOL(rng_is_initialized); +static void crng_set_ready(struct work_struct *work) +{ + static_branch_enable(&crng_is_ready); +} + /* Used by wait_for_random_bytes(), and considered an entropy collector, below. */ static void try_to_generate_entropy(void); @@ -269,7 +275,7 @@ static void crng_reseed(void) ++next_gen; WRITE_ONCE(base_crng.generation, next_gen); WRITE_ONCE(base_crng.birth, jiffies); - if (!crng_ready()) + if (!static_branch_likely(&crng_is_ready)) crng_init = CRNG_READY; spin_unlock_irqrestore(&base_crng.lock, flags); memzero_explicit(key, sizeof(key)); @@ -787,6 +793,7 @@ static void extract_entropy(void *buf, size_t nbytes) static void credit_init_bits(size_t nbits) { + static struct execute_work set_ready; unsigned int new, orig, add; unsigned long flags; @@ -802,6 +809,7 @@ static void credit_init_bits(size_t nbits) if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) { crng_reseed(); /* Sets crng_init to CRNG_READY under base_crng.lock. */ + execute_in_process_context(crng_set_ready, &set_ready); process_random_ready_list(); wake_up_interruptible(&crng_init_wait); kill_fasync(&fasync, SIGIO, POLL_IN); @@ -1311,7 +1319,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, if (count > INT_MAX) count = INT_MAX; - if (!(flags & GRND_INSECURE) && !crng_ready()) { + if (!crng_ready() && !(flags & GRND_INSECURE)) { int ret; if (flags & GRND_NONBLOCK) -- GitLab From 55729575eaa990f5881891b5a2b9c0fec0ec24ea Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 13 May 2022 12:29:38 +0200 Subject: [PATCH 0269/4335] random: remove extern from functions in header commit 7782cfeca7d420e8bb707613d4cfb0f7ff29bb3a upstream. Accoriding to the kernel style guide, having `extern` on functions in headers is old school and deprecated, and doesn't add anything. So remove them from random.h, and tidy up the file a little bit too. Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- include/linux/random.h | 71 +++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 43 deletions(-) diff --git a/include/linux/random.h b/include/linux/random.h index 588e31dc10af3..0c140a0847a2e 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -12,13 +12,12 @@ struct notifier_block; -extern void add_device_randomness(const void *, size_t); -extern void add_bootloader_randomness(const void *, size_t); -extern void add_input_randomness(unsigned int type, unsigned int code, - unsigned int value) __latent_entropy; -extern void add_interrupt_randomness(int irq) __latent_entropy; -extern void add_hwgenerator_randomness(const void *buffer, size_t count, - size_t entropy); +void add_device_randomness(const void *, size_t); +void add_bootloader_randomness(const void *, size_t); +void add_input_randomness(unsigned int type, unsigned int code, + unsigned int value) __latent_entropy; +void add_interrupt_randomness(int irq) __latent_entropy; +void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy); #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) static inline void add_latent_entropy(void) @@ -26,21 +25,11 @@ static inline void add_latent_entropy(void) add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); } #else -static inline void add_latent_entropy(void) {} -#endif - -extern void get_random_bytes(void *buf, size_t nbytes); -extern int wait_for_random_bytes(void); -extern int __init random_init(const char *command_line); -extern bool rng_is_initialized(void); -extern int register_random_ready_notifier(struct notifier_block *nb); -extern int unregister_random_ready_notifier(struct notifier_block *nb); -extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes); - -#ifndef MODULE -extern const struct file_operations random_fops, urandom_fops; +static inline void add_latent_entropy(void) { } #endif +void get_random_bytes(void *buf, size_t nbytes); +size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes); u32 get_random_u32(void); u64 get_random_u64(void); static inline unsigned int get_random_int(void) @@ -72,11 +61,17 @@ static inline unsigned long get_random_long(void) static inline unsigned long get_random_canary(void) { - unsigned long val = get_random_long(); - - return val & CANARY_MASK; + return get_random_long() & CANARY_MASK; } +unsigned long randomize_page(unsigned long start, unsigned long range); + +int __init random_init(const char *command_line); +bool rng_is_initialized(void); +int wait_for_random_bytes(void); +int register_random_ready_notifier(struct notifier_block *nb); +int unregister_random_ready_notifier(struct notifier_block *nb); + /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). * Returns the result of the call to wait_for_random_bytes. */ static inline int get_random_bytes_wait(void *buf, size_t nbytes) @@ -100,8 +95,6 @@ declare_get_random_var_wait(int) declare_get_random_var_wait(long) #undef declare_get_random_var -unsigned long randomize_page(unsigned long start, unsigned long range); - /* * This is designed to be standalone for just prandom * users, but for now we include it from @@ -112,22 +105,10 @@ unsigned long randomize_page(unsigned long start, unsigned long range); #ifdef CONFIG_ARCH_RANDOM # include #else -static inline bool __must_check arch_get_random_long(unsigned long *v) -{ - return false; -} -static inline bool __must_check arch_get_random_int(unsigned int *v) -{ - return false; -} -static inline bool __must_check arch_get_random_seed_long(unsigned long *v) -{ - return false; -} -static inline bool __must_check arch_get_random_seed_int(unsigned int *v) -{ - return false; -} +static inline bool __must_check arch_get_random_long(unsigned long *v) { return false; } +static inline bool __must_check arch_get_random_int(unsigned int *v) { return false; } +static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { return false; } +static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { return false; } #endif /* @@ -151,8 +132,12 @@ static inline bool __init arch_get_random_long_early(unsigned long *v) #endif #ifdef CONFIG_SMP -extern int random_prepare_cpu(unsigned int cpu); -extern int random_online_cpu(unsigned int cpu); +int random_prepare_cpu(unsigned int cpu); +int random_online_cpu(unsigned int cpu); +#endif + +#ifndef MODULE +extern const struct file_operations random_fops, urandom_fops; #endif #endif /* _LINUX_RANDOM_H */ -- GitLab From 29ed26a33436bb4bbfff5189a4cf3e34863d77dd Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 13 May 2022 12:32:23 +0200 Subject: [PATCH 0270/4335] random: use proper return types on get_random_{int,long}_wait() commit 7c3a8a1db5e03d02cc0abb3357a84b8b326dfac3 upstream. Before these were returning signed values, but the API is intended to be used with unsigned values. Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- include/linux/random.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/random.h b/include/linux/random.h index 0c140a0847a2e..0416b393ca4d3 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -81,18 +81,18 @@ static inline int get_random_bytes_wait(void *buf, size_t nbytes) return ret; } -#define declare_get_random_var_wait(var) \ - static inline int get_random_ ## var ## _wait(var *out) { \ +#define declare_get_random_var_wait(name, ret_type) \ + static inline int get_random_ ## name ## _wait(ret_type *out) { \ int ret = wait_for_random_bytes(); \ if (unlikely(ret)) \ return ret; \ - *out = get_random_ ## var(); \ + *out = get_random_ ## name(); \ return 0; \ } -declare_get_random_var_wait(u32) -declare_get_random_var_wait(u64) -declare_get_random_var_wait(int) -declare_get_random_var_wait(long) +declare_get_random_var_wait(u32, u32) +declare_get_random_var_wait(u64, u32) +declare_get_random_var_wait(int, unsigned int) +declare_get_random_var_wait(long, unsigned long) #undef declare_get_random_var /* -- GitLab From 542a60612d2ab0146eec2ed5a0e971ccf7ae658b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 13 May 2022 13:18:46 +0200 Subject: [PATCH 0271/4335] random: make consistent use of buf and len commit a19402634c435a4eae226df53c141cdbb9922e7b upstream. The current code was a mix of "nbytes", "count", "size", "buffer", "in", and so forth. Instead, let's clean this up by naming input parameters "buf" (or "ubuf") and "len", so that you always understand that you're reading this variety of function argument. Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 193 ++++++++++++++++++++--------------------- include/linux/random.h | 10 +-- 2 files changed, 99 insertions(+), 104 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 9611c9069985f..0719c8d8a5714 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -210,7 +210,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller) * * There are a few exported interfaces for use by other drivers: * - * void get_random_bytes(void *buf, size_t nbytes) + * void get_random_bytes(void *buf, size_t len) * u32 get_random_u32() * u64 get_random_u64() * unsigned int get_random_int() @@ -251,7 +251,7 @@ static DEFINE_PER_CPU(struct crng, crngs) = { }; /* Used by crng_reseed() and crng_make_state() to extract a new seed from the input pool. */ -static void extract_entropy(void *buf, size_t nbytes); +static void extract_entropy(void *buf, size_t len); /* This extracts a new crng key from the input pool. */ static void crng_reseed(void) @@ -405,24 +405,24 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], local_unlock_irqrestore(&crngs.lock, flags); } -static void _get_random_bytes(void *buf, size_t nbytes) +static void _get_random_bytes(void *buf, size_t len) { u32 chacha_state[CHACHA_STATE_WORDS]; u8 tmp[CHACHA_BLOCK_SIZE]; - size_t len; + size_t first_block_len; - if (!nbytes) + if (!len) return; - len = min_t(size_t, 32, nbytes); - crng_make_state(chacha_state, buf, len); - nbytes -= len; - buf += len; + first_block_len = min_t(size_t, 32, len); + crng_make_state(chacha_state, buf, first_block_len); + len -= first_block_len; + buf += first_block_len; - while (nbytes) { - if (nbytes < CHACHA_BLOCK_SIZE) { + while (len) { + if (len < CHACHA_BLOCK_SIZE) { chacha20_block(chacha_state, tmp); - memcpy(buf, tmp, nbytes); + memcpy(buf, tmp, len); memzero_explicit(tmp, sizeof(tmp)); break; } @@ -430,7 +430,7 @@ static void _get_random_bytes(void *buf, size_t nbytes) chacha20_block(chacha_state, buf); if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; - nbytes -= CHACHA_BLOCK_SIZE; + len -= CHACHA_BLOCK_SIZE; buf += CHACHA_BLOCK_SIZE; } @@ -447,20 +447,20 @@ static void _get_random_bytes(void *buf, size_t nbytes) * wait_for_random_bytes() should be called and return 0 at least once * at any point prior. */ -void get_random_bytes(void *buf, size_t nbytes) +void get_random_bytes(void *buf, size_t len) { warn_unseeded_randomness(); - _get_random_bytes(buf, nbytes); + _get_random_bytes(buf, len); } EXPORT_SYMBOL(get_random_bytes); -static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) +static ssize_t get_random_bytes_user(void __user *ubuf, size_t len) { - size_t len, left, ret = 0; + size_t block_len, left, ret = 0; u32 chacha_state[CHACHA_STATE_WORDS]; u8 output[CHACHA_BLOCK_SIZE]; - if (!nbytes) + if (!len) return 0; /* @@ -474,8 +474,8 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) * use chacha_state after, so we can simply return those bytes to * the user directly. */ - if (nbytes <= CHACHA_KEY_SIZE) { - ret = nbytes - copy_to_user(buf, &chacha_state[4], nbytes); + if (len <= CHACHA_KEY_SIZE) { + ret = len - copy_to_user(ubuf, &chacha_state[4], len); goto out_zero_chacha; } @@ -484,17 +484,17 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; - len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE); - left = copy_to_user(buf, output, len); + block_len = min_t(size_t, len, CHACHA_BLOCK_SIZE); + left = copy_to_user(ubuf, output, block_len); if (left) { - ret += len - left; + ret += block_len - left; break; } - buf += len; - ret += len; - nbytes -= len; - if (!nbytes) + ubuf += block_len; + ret += block_len; + len -= block_len; + if (!len) break; BUILD_BUG_ON(PAGE_SIZE % CHACHA_BLOCK_SIZE != 0); @@ -668,24 +668,24 @@ unsigned long randomize_page(unsigned long start, unsigned long range) * use. Use get_random_bytes() instead. It returns the number of * bytes filled in. */ -size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes) +size_t __must_check get_random_bytes_arch(void *buf, size_t len) { - size_t left = nbytes; + size_t left = len; u8 *p = buf; while (left) { unsigned long v; - size_t chunk = min_t(size_t, left, sizeof(unsigned long)); + size_t block_len = min_t(size_t, left, sizeof(unsigned long)); if (!arch_get_random_long(&v)) break; - memcpy(p, &v, chunk); - p += chunk; - left -= chunk; + memcpy(p, &v, block_len); + p += block_len; + left -= block_len; } - return nbytes - left; + return len - left; } EXPORT_SYMBOL(get_random_bytes_arch); @@ -696,15 +696,15 @@ EXPORT_SYMBOL(get_random_bytes_arch); * * Callers may add entropy via: * - * static void mix_pool_bytes(const void *in, size_t nbytes) + * static void mix_pool_bytes(const void *buf, size_t len) * * After which, if added entropy should be credited: * - * static void credit_init_bits(size_t nbits) + * static void credit_init_bits(size_t bits) * * Finally, extract entropy via: * - * static void extract_entropy(void *buf, size_t nbytes) + * static void extract_entropy(void *buf, size_t len) * **********************************************************************/ @@ -726,9 +726,9 @@ static struct { .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), }; -static void _mix_pool_bytes(const void *in, size_t nbytes) +static void _mix_pool_bytes(const void *buf, size_t len) { - blake2s_update(&input_pool.hash, in, nbytes); + blake2s_update(&input_pool.hash, buf, len); } /* @@ -736,12 +736,12 @@ static void _mix_pool_bytes(const void *in, size_t nbytes) * update the initialization bit counter; the caller should call * credit_init_bits if this is appropriate. */ -static void mix_pool_bytes(const void *in, size_t nbytes) +static void mix_pool_bytes(const void *buf, size_t len) { unsigned long flags; spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(in, nbytes); + _mix_pool_bytes(buf, len); spin_unlock_irqrestore(&input_pool.lock, flags); } @@ -749,7 +749,7 @@ static void mix_pool_bytes(const void *in, size_t nbytes) * This is an HKDF-like construction for using the hashed collected entropy * as a PRF key, that's then expanded block-by-block. */ -static void extract_entropy(void *buf, size_t nbytes) +static void extract_entropy(void *buf, size_t len) { unsigned long flags; u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; @@ -778,12 +778,12 @@ static void extract_entropy(void *buf, size_t nbytes) spin_unlock_irqrestore(&input_pool.lock, flags); memzero_explicit(next_key, sizeof(next_key)); - while (nbytes) { - i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE); + while (len) { + i = min_t(size_t, len, BLAKE2S_HASH_SIZE); /* output = HASHPRF(seed, RDSEED || ++counter) */ ++block.counter; blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); - nbytes -= i; + len -= i; buf += i; } @@ -791,16 +791,16 @@ static void extract_entropy(void *buf, size_t nbytes) memzero_explicit(&block, sizeof(block)); } -static void credit_init_bits(size_t nbits) +static void credit_init_bits(size_t bits) { static struct execute_work set_ready; unsigned int new, orig, add; unsigned long flags; - if (crng_ready() || !nbits) + if (crng_ready() || !bits) return; - add = min_t(size_t, nbits, POOL_BITS); + add = min_t(size_t, bits, POOL_BITS); do { orig = READ_ONCE(input_pool.init_bits); @@ -836,13 +836,11 @@ static void credit_init_bits(size_t nbits) * The following exported functions are used for pushing entropy into * the above entropy accumulation routines: * - * void add_device_randomness(const void *buf, size_t size); - * void add_hwgenerator_randomness(const void *buffer, size_t count, - * size_t entropy); - * void add_bootloader_randomness(const void *buf, size_t size); + * void add_device_randomness(const void *buf, size_t len); + * void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy); + * void add_bootloader_randomness(const void *buf, size_t len); * void add_interrupt_randomness(int irq); - * void add_input_randomness(unsigned int type, unsigned int code, - * unsigned int value); + * void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); * void add_disk_randomness(struct gendisk *disk); * * add_device_randomness() adds data to the input pool that @@ -906,7 +904,7 @@ int __init random_init(const char *command_line) { ktime_t now = ktime_get_real(); unsigned int i, arch_bytes; - unsigned long rv; + unsigned long entropy; #if defined(LATENT_ENTROPY_PLUGIN) static const u8 compiletime_seed[BLAKE2S_BLOCK_SIZE] __initconst __latent_entropy; @@ -914,13 +912,13 @@ int __init random_init(const char *command_line) #endif for (i = 0, arch_bytes = BLAKE2S_BLOCK_SIZE; - i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) { - if (!arch_get_random_seed_long_early(&rv) && - !arch_get_random_long_early(&rv)) { - rv = random_get_entropy(); - arch_bytes -= sizeof(rv); + i < BLAKE2S_BLOCK_SIZE; i += sizeof(entropy)) { + if (!arch_get_random_seed_long_early(&entropy) && + !arch_get_random_long_early(&entropy)) { + entropy = random_get_entropy(); + arch_bytes -= sizeof(entropy); } - _mix_pool_bytes(&rv, sizeof(rv)); + _mix_pool_bytes(&entropy, sizeof(entropy)); } _mix_pool_bytes(&now, sizeof(now)); _mix_pool_bytes(utsname(), sizeof(*(utsname()))); @@ -943,14 +941,14 @@ int __init random_init(const char *command_line) * the entropy pool having similar initial state across largely * identical devices. */ -void add_device_randomness(const void *buf, size_t size) +void add_device_randomness(const void *buf, size_t len) { unsigned long entropy = random_get_entropy(); unsigned long flags; spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(&entropy, sizeof(entropy)); - _mix_pool_bytes(buf, size); + _mix_pool_bytes(buf, len); spin_unlock_irqrestore(&input_pool.lock, flags); } EXPORT_SYMBOL(add_device_randomness); @@ -960,10 +958,9 @@ EXPORT_SYMBOL(add_device_randomness); * Those devices may produce endless random bits and will be throttled * when our pool is full. */ -void add_hwgenerator_randomness(const void *buffer, size_t count, - size_t entropy) +void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy) { - mix_pool_bytes(buffer, count); + mix_pool_bytes(buf, len); credit_init_bits(entropy); /* @@ -979,11 +976,11 @@ EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); * Handle random seed passed by bootloader, and credit it if * CONFIG_RANDOM_TRUST_BOOTLOADER is set. */ -void add_bootloader_randomness(const void *buf, size_t size) +void add_bootloader_randomness(const void *buf, size_t len) { - mix_pool_bytes(buf, size); + mix_pool_bytes(buf, len); if (trust_bootloader) - credit_init_bits(size * 8); + credit_init_bits(len * 8); } EXPORT_SYMBOL_GPL(add_bootloader_randomness); @@ -1183,8 +1180,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned int nu credit_init_bits(bits); } -void add_input_randomness(unsigned int type, unsigned int code, - unsigned int value) +void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) { static unsigned char last_value; static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES }; @@ -1303,8 +1299,7 @@ static void try_to_generate_entropy(void) * **********************************************************************/ -SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, - flags) +SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags) { if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) return -EINVAL; @@ -1316,8 +1311,8 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) return -EINVAL; - if (count > INT_MAX) - count = INT_MAX; + if (len > INT_MAX) + len = INT_MAX; if (!crng_ready() && !(flags & GRND_INSECURE)) { int ret; @@ -1328,7 +1323,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, if (unlikely(ret)) return ret; } - return get_random_bytes_user(buf, count); + return get_random_bytes_user(ubuf, len); } static __poll_t random_poll(struct file *file, poll_table *wait) @@ -1337,21 +1332,21 @@ static __poll_t random_poll(struct file *file, poll_table *wait) return crng_ready() ? EPOLLIN | EPOLLRDNORM : EPOLLOUT | EPOLLWRNORM; } -static int write_pool(const char __user *ubuf, size_t count) +static int write_pool(const char __user *ubuf, size_t len) { - size_t len; + size_t block_len; int ret = 0; u8 block[BLAKE2S_BLOCK_SIZE]; - while (count) { - len = min(count, sizeof(block)); - if (copy_from_user(block, ubuf, len)) { + while (len) { + block_len = min(len, sizeof(block)); + if (copy_from_user(block, ubuf, block_len)) { ret = -EFAULT; goto out; } - count -= len; - ubuf += len; - mix_pool_bytes(block, len); + len -= block_len; + ubuf += block_len; + mix_pool_bytes(block, block_len); cond_resched(); } @@ -1360,20 +1355,20 @@ out: return ret; } -static ssize_t random_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) +static ssize_t random_write(struct file *file, const char __user *ubuf, + size_t len, loff_t *ppos) { int ret; - ret = write_pool(buffer, count); + ret = write_pool(ubuf, len); if (ret) return ret; - return (ssize_t)count; + return (ssize_t)len; } -static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) +static ssize_t urandom_read(struct file *file, char __user *ubuf, + size_t len, loff_t *ppos) { static int maxwarn = 10; @@ -1383,22 +1378,22 @@ static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, else if (ratelimit_disable || __ratelimit(&urandom_warning)) { --maxwarn; pr_notice("%s: uninitialized urandom read (%zd bytes read)\n", - current->comm, nbytes); + current->comm, len); } } - return get_random_bytes_user(buf, nbytes); + return get_random_bytes_user(ubuf, len); } -static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) +static ssize_t random_read(struct file *file, char __user *ubuf, + size_t len, loff_t *ppos) { int ret; ret = wait_for_random_bytes(); if (ret != 0) return ret; - return get_random_bytes_user(buf, nbytes); + return get_random_bytes_user(ubuf, len); } static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) @@ -1523,7 +1518,7 @@ static u8 sysctl_bootid[UUID_SIZE]; * UUID. The difference is in whether table->data is NULL; if it is, * then a new UUID is generated and returned to the user. */ -static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, +static int proc_do_uuid(struct ctl_table *table, int write, void *buf, size_t *lenp, loff_t *ppos) { u8 tmp_uuid[UUID_SIZE], *uuid; @@ -1550,14 +1545,14 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, } snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid); - return proc_dostring(&fake_table, 0, buffer, lenp, ppos); + return proc_dostring(&fake_table, 0, buf, lenp, ppos); } /* The same as proc_dointvec, but writes don't change anything. */ -static int proc_do_rointvec(struct ctl_table *table, int write, void *buffer, +static int proc_do_rointvec(struct ctl_table *table, int write, void *buf, size_t *lenp, loff_t *ppos) { - return write ? 0 : proc_dointvec(table, 0, buffer, lenp, ppos); + return write ? 0 : proc_dointvec(table, 0, buf, lenp, ppos); } extern struct ctl_table random_table[]; diff --git a/include/linux/random.h b/include/linux/random.h index 0416b393ca4d3..d6ef6cf4346aa 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -12,12 +12,12 @@ struct notifier_block; -void add_device_randomness(const void *, size_t); -void add_bootloader_randomness(const void *, size_t); +void add_device_randomness(const void *buf, size_t len); +void add_bootloader_randomness(const void *buf, size_t len); void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; void add_interrupt_randomness(int irq) __latent_entropy; -void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy); +void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy); #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) static inline void add_latent_entropy(void) @@ -28,8 +28,8 @@ static inline void add_latent_entropy(void) static inline void add_latent_entropy(void) { } #endif -void get_random_bytes(void *buf, size_t nbytes); -size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes); +void get_random_bytes(void *buf, size_t len); +size_t __must_check get_random_bytes_arch(void *buf, size_t len); u32 get_random_u32(void); u64 get_random_u64(void); static inline unsigned int get_random_int(void) -- GitLab From 55a368c3e850ec370025158be63806a31b189a08 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 13 May 2022 16:17:12 +0200 Subject: [PATCH 0272/4335] random: move initialization functions out of hot pages commit 560181c27b582557d633ecb608110075433383af upstream. Much of random.c is devoted to initializing the rng and accounting for when a sufficient amount of entropy has been added. In a perfect world, this would all happen during init, and so we could mark these functions as __init. But in reality, this isn't the case: sometimes the rng only finishes initializing some seconds after system init is finished. For this reason, at the moment, a whole host of functions that are only used relatively close to system init and then never again are intermixed with functions that are used in hot code all the time. This creates more cache misses than necessary. In order to pack the hot code closer together, this commit moves the initialization functions that can't be marked as __init into .text.unlikely by way of the __cold attribute. Of particular note is moving credit_init_bits() into a macro wrapper that inlines the crng_ready() static branch check. This avoids a function call to a nop+ret, and most notably prevents extra entropy arithmetic from being computed in mix_interrupt_randomness(). Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 0719c8d8a5714..772f4536fc6ae 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -109,7 +109,7 @@ bool rng_is_initialized(void) } EXPORT_SYMBOL(rng_is_initialized); -static void crng_set_ready(struct work_struct *work) +static void __cold crng_set_ready(struct work_struct *work) { static_branch_enable(&crng_is_ready); } @@ -148,7 +148,7 @@ EXPORT_SYMBOL(wait_for_random_bytes); * returns: 0 if callback is successfully added * -EALREADY if pool is already initialised (callback not called) */ -int register_random_ready_notifier(struct notifier_block *nb) +int __cold register_random_ready_notifier(struct notifier_block *nb) { unsigned long flags; int ret = -EALREADY; @@ -167,7 +167,7 @@ EXPORT_SYMBOL(register_random_ready_notifier); /* * Delete a previously registered readiness callback function. */ -int unregister_random_ready_notifier(struct notifier_block *nb) +int __cold unregister_random_ready_notifier(struct notifier_block *nb) { unsigned long flags; int ret; @@ -179,7 +179,7 @@ int unregister_random_ready_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_random_ready_notifier); -static void process_random_ready_list(void) +static void __cold process_random_ready_list(void) { unsigned long flags; @@ -189,15 +189,9 @@ static void process_random_ready_list(void) } #define warn_unseeded_randomness() \ - _warn_unseeded_randomness(__func__, (void *)_RET_IP_) - -static void _warn_unseeded_randomness(const char *func_name, void *caller) -{ - if (!IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) || crng_ready()) - return; - printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", - func_name, caller, crng_init); -} + if (IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) && !crng_ready()) \ + printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", \ + __func__, (void *)_RET_IP_, crng_init) /********************************************************************* @@ -616,7 +610,7 @@ EXPORT_SYMBOL(get_random_u32); * This function is called when the CPU is coming up, with entry * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP. */ -int random_prepare_cpu(unsigned int cpu) +int __cold random_prepare_cpu(unsigned int cpu) { /* * When the cpu comes back online, immediately invalidate both @@ -791,13 +785,15 @@ static void extract_entropy(void *buf, size_t len) memzero_explicit(&block, sizeof(block)); } -static void credit_init_bits(size_t bits) +#define credit_init_bits(bits) if (!crng_ready()) _credit_init_bits(bits) + +static void __cold _credit_init_bits(size_t bits) { static struct execute_work set_ready; unsigned int new, orig, add; unsigned long flags; - if (crng_ready() || !bits) + if (!bits) return; add = min_t(size_t, bits, POOL_BITS); @@ -976,7 +972,7 @@ EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); * Handle random seed passed by bootloader, and credit it if * CONFIG_RANDOM_TRUST_BOOTLOADER is set. */ -void add_bootloader_randomness(const void *buf, size_t len) +void __cold add_bootloader_randomness(const void *buf, size_t len) { mix_pool_bytes(buf, len); if (trust_bootloader) @@ -1022,7 +1018,7 @@ static void fast_mix(unsigned long s[4], unsigned long v1, unsigned long v2) * This function is called when the CPU has just come online, with * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE. */ -int random_online_cpu(unsigned int cpu) +int __cold random_online_cpu(unsigned int cpu) { /* * During CPU shutdown and before CPU onlining, add_interrupt_ @@ -1177,7 +1173,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned int nu if (in_hardirq()) this_cpu_ptr(&irq_randomness)->count += max(1u, bits * 64) - 1; else - credit_init_bits(bits); + _credit_init_bits(bits); } void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) @@ -1205,7 +1201,7 @@ void add_disk_randomness(struct gendisk *disk) } EXPORT_SYMBOL_GPL(add_disk_randomness); -void rand_initialize_disk(struct gendisk *disk) +void __cold rand_initialize_disk(struct gendisk *disk) { struct timer_rand_state *state; @@ -1234,7 +1230,7 @@ void rand_initialize_disk(struct gendisk *disk) * * So the re-arming always happens in the entropy loop itself. */ -static void entropy_timer(struct timer_list *t) +static void __cold entropy_timer(struct timer_list *t) { credit_init_bits(1); } @@ -1243,7 +1239,7 @@ static void entropy_timer(struct timer_list *t) * If we have an actual cycle counter, see if we can * generate enough entropy with timing noise */ -static void try_to_generate_entropy(void) +static void __cold try_to_generate_entropy(void) { struct { unsigned long entropy; -- GitLab From 64cb7f01ddd289fefbd2a8051c288aaf5d50a9c7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 14 May 2022 13:59:30 +0200 Subject: [PATCH 0273/4335] random: move randomize_page() into mm where it belongs commit 5ad7dd882e45d7fe432c32e896e2aaa0b21746ea upstream. randomize_page is an mm function. It is documented like one. It contains the history of one. It has the naming convention of one. It looks just like another very similar function in mm, randomize_stack_top(). And it has always been maintained and updated by mm people. There is no need for it to be in random.c. In the "which shape does not look like the other ones" test, pointing to randomize_page() is correct. So move randomize_page() into mm/util.c, right next to the similar randomize_stack_top() function. This commit contains no actual code changes. Cc: Andrew Morton Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 32 -------------------------------- include/linux/mm.h | 1 + include/linux/random.h | 2 -- mm/util.c | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 34 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 772f4536fc6ae..dfff00c43da88 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -624,38 +624,6 @@ int __cold random_prepare_cpu(unsigned int cpu) } #endif -/** - * randomize_page - Generate a random, page aligned address - * @start: The smallest acceptable address the caller will take. - * @range: The size of the area, starting at @start, within which the - * random address must fall. - * - * If @start + @range would overflow, @range is capped. - * - * NOTE: Historical use of randomize_range, which this replaces, presumed that - * @start was already page aligned. We now align it regardless. - * - * Return: A page aligned address within [start, start + range). On error, - * @start is returned. - */ -unsigned long randomize_page(unsigned long start, unsigned long range) -{ - if (!PAGE_ALIGNED(start)) { - range -= PAGE_ALIGN(start) - start; - start = PAGE_ALIGN(start); - } - - if (start > ULONG_MAX - range) - range = ULONG_MAX - start; - - range >>= PAGE_SHIFT; - - if (range == 0) - return start; - - return start + (get_random_long() % range << PAGE_SHIFT); -} - /* * This function will use the architecture-specific hardware random * number generator if it is available. It is not recommended for diff --git a/include/linux/mm.h b/include/linux/mm.h index 04345ff97f8ca..85205adcdd0d1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2607,6 +2607,7 @@ extern int install_special_mapping(struct mm_struct *mm, unsigned long flags, struct page **pages); unsigned long randomize_stack_top(unsigned long stack_top); +unsigned long randomize_page(unsigned long start, unsigned long range); extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/include/linux/random.h b/include/linux/random.h index d6ef6cf4346aa..917470c4490ac 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -64,8 +64,6 @@ static inline unsigned long get_random_canary(void) return get_random_long() & CANARY_MASK; } -unsigned long randomize_page(unsigned long start, unsigned long range); - int __init random_init(const char *command_line); bool rng_is_initialized(void); int wait_for_random_bytes(void); diff --git a/mm/util.c b/mm/util.c index ea09dd33ab594..3073de05c2bd4 100644 --- a/mm/util.c +++ b/mm/util.c @@ -343,6 +343,38 @@ unsigned long randomize_stack_top(unsigned long stack_top) #endif } +/** + * randomize_page - Generate a random, page aligned address + * @start: The smallest acceptable address the caller will take. + * @range: The size of the area, starting at @start, within which the + * random address must fall. + * + * If @start + @range would overflow, @range is capped. + * + * NOTE: Historical use of randomize_range, which this replaces, presumed that + * @start was already page aligned. We now align it regardless. + * + * Return: A page aligned address within [start, start + range). On error, + * @start is returned. + */ +unsigned long randomize_page(unsigned long start, unsigned long range) +{ + if (!PAGE_ALIGNED(start)) { + range -= PAGE_ALIGN(start) - start; + start = PAGE_ALIGN(start); + } + + if (start > ULONG_MAX - range) + range = ULONG_MAX - start; + + range >>= PAGE_SHIFT; + + if (range == 0) + return start; + + return start + (get_random_long() % range << PAGE_SHIFT); +} + #ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT unsigned long arch_randomize_brk(struct mm_struct *mm) { -- GitLab From 6244da28c6b3f7b413e32197acf5bd06dbe2ecb5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 15 May 2022 00:22:05 +0200 Subject: [PATCH 0274/4335] random: unify batched entropy implementations commit 3092adcef3ffd2ef59634998297ca8358461ebce upstream. There are currently two separate batched entropy implementations, for u32 and u64, with nearly identical code, with the goal of avoiding unaligned memory accesses and letting the buffers be used more efficiently. Having to maintain these two functions independently is a bit of a hassle though, considering that they always need to be kept in sync. This commit factors them out into a type-generic macro, so that the expansion produces the same code as before, such that diffing the assembly shows no differences. This will also make it easier in the future to add u16 and u8 batches. This was initially tested using an always_inline function and letting gcc constant fold the type size in, but the code gen was less efficient, and in general it was more verbose and harder to follow. So this patch goes with the boring macro solution, similar to what's already done for the _wait functions in random.h. Cc: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 147 ++++++++++++++++-------------------------- 1 file changed, 55 insertions(+), 92 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index dfff00c43da88..c41e372955c38 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -511,99 +511,62 @@ out_zero_chacha: * provided by this function is okay, the function wait_for_random_bytes() * should be called and return 0 at least once at any point prior. */ -struct batched_entropy { - union { - /* - * We make this 1.5x a ChaCha block, so that we get the - * remaining 32 bytes from fast key erasure, plus one full - * block from the detached ChaCha state. We can increase - * the size of this later if needed so long as we keep the - * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. - */ - u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))]; - u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))]; - }; - local_lock_t lock; - unsigned long generation; - unsigned int position; -}; - - -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { - .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock), - .position = UINT_MAX -}; - -u64 get_random_u64(void) -{ - u64 ret; - unsigned long flags; - struct batched_entropy *batch; - unsigned long next_gen; - - warn_unseeded_randomness(); - - if (!crng_ready()) { - _get_random_bytes(&ret, sizeof(ret)); - return ret; - } - - local_lock_irqsave(&batched_entropy_u64.lock, flags); - batch = raw_cpu_ptr(&batched_entropy_u64); - - next_gen = READ_ONCE(base_crng.generation); - if (batch->position >= ARRAY_SIZE(batch->entropy_u64) || - next_gen != batch->generation) { - _get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64)); - batch->position = 0; - batch->generation = next_gen; - } - ret = batch->entropy_u64[batch->position]; - batch->entropy_u64[batch->position] = 0; - ++batch->position; - local_unlock_irqrestore(&batched_entropy_u64.lock, flags); - return ret; -} -EXPORT_SYMBOL(get_random_u64); - -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { - .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock), - .position = UINT_MAX -}; - -u32 get_random_u32(void) -{ - u32 ret; - unsigned long flags; - struct batched_entropy *batch; - unsigned long next_gen; - - warn_unseeded_randomness(); - - if (!crng_ready()) { - _get_random_bytes(&ret, sizeof(ret)); - return ret; - } - - local_lock_irqsave(&batched_entropy_u32.lock, flags); - batch = raw_cpu_ptr(&batched_entropy_u32); - - next_gen = READ_ONCE(base_crng.generation); - if (batch->position >= ARRAY_SIZE(batch->entropy_u32) || - next_gen != batch->generation) { - _get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32)); - batch->position = 0; - batch->generation = next_gen; - } - - ret = batch->entropy_u32[batch->position]; - batch->entropy_u32[batch->position] = 0; - ++batch->position; - local_unlock_irqrestore(&batched_entropy_u32.lock, flags); - return ret; -} -EXPORT_SYMBOL(get_random_u32); +#define DEFINE_BATCHED_ENTROPY(type) \ +struct batch_ ##type { \ + /* \ + * We make this 1.5x a ChaCha block, so that we get the \ + * remaining 32 bytes from fast key erasure, plus one full \ + * block from the detached ChaCha state. We can increase \ + * the size of this later if needed so long as we keep the \ + * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. \ + */ \ + type entropy[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(type))]; \ + local_lock_t lock; \ + unsigned long generation; \ + unsigned int position; \ +}; \ + \ +static DEFINE_PER_CPU(struct batch_ ##type, batched_entropy_ ##type) = { \ + .lock = INIT_LOCAL_LOCK(batched_entropy_ ##type.lock), \ + .position = UINT_MAX \ +}; \ + \ +type get_random_ ##type(void) \ +{ \ + type ret; \ + unsigned long flags; \ + struct batch_ ##type *batch; \ + unsigned long next_gen; \ + \ + warn_unseeded_randomness(); \ + \ + if (!crng_ready()) { \ + _get_random_bytes(&ret, sizeof(ret)); \ + return ret; \ + } \ + \ + local_lock_irqsave(&batched_entropy_ ##type.lock, flags); \ + batch = raw_cpu_ptr(&batched_entropy_##type); \ + \ + next_gen = READ_ONCE(base_crng.generation); \ + if (batch->position >= ARRAY_SIZE(batch->entropy) || \ + next_gen != batch->generation) { \ + _get_random_bytes(batch->entropy, sizeof(batch->entropy)); \ + batch->position = 0; \ + batch->generation = next_gen; \ + } \ + \ + ret = batch->entropy[batch->position]; \ + batch->entropy[batch->position] = 0; \ + ++batch->position; \ + local_unlock_irqrestore(&batched_entropy_ ##type.lock, flags); \ + return ret; \ +} \ +EXPORT_SYMBOL(get_random_ ##type); + +DEFINE_BATCHED_ENTROPY(u64) +DEFINE_BATCHED_ENTROPY(u32) #ifdef CONFIG_SMP /* -- GitLab From 7f8cea12a49425fe166fcd70fcbaa837803ab43d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 May 2022 17:31:36 -0600 Subject: [PATCH 0275/4335] random: convert to using fops->read_iter() commit 1b388e7765f2eaa137cf5d92b47ef5925ad83ced upstream. This is a pre-requisite to wiring up splice() again for the random and urandom drivers. It also allows us to remove the INT_MAX check in getrandom(), because import_single_range() applies capping internally. Signed-off-by: Jens Axboe [Jason: rewrote get_random_bytes_user() to simplify and also incorporate additional suggestions from Al.] Cc: Al Viro Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 66 ++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index c41e372955c38..50af5da06a79e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -448,13 +449,13 @@ void get_random_bytes(void *buf, size_t len) } EXPORT_SYMBOL(get_random_bytes); -static ssize_t get_random_bytes_user(void __user *ubuf, size_t len) +static ssize_t get_random_bytes_user(struct iov_iter *iter) { - size_t block_len, left, ret = 0; u32 chacha_state[CHACHA_STATE_WORDS]; - u8 output[CHACHA_BLOCK_SIZE]; + u8 block[CHACHA_BLOCK_SIZE]; + size_t ret = 0, copied; - if (!len) + if (unlikely(!iov_iter_count(iter))) return 0; /* @@ -468,30 +469,22 @@ static ssize_t get_random_bytes_user(void __user *ubuf, size_t len) * use chacha_state after, so we can simply return those bytes to * the user directly. */ - if (len <= CHACHA_KEY_SIZE) { - ret = len - copy_to_user(ubuf, &chacha_state[4], len); + if (iov_iter_count(iter) <= CHACHA_KEY_SIZE) { + ret = copy_to_iter(&chacha_state[4], CHACHA_KEY_SIZE, iter); goto out_zero_chacha; } for (;;) { - chacha20_block(chacha_state, output); + chacha20_block(chacha_state, block); if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; - block_len = min_t(size_t, len, CHACHA_BLOCK_SIZE); - left = copy_to_user(ubuf, output, block_len); - if (left) { - ret += block_len - left; - break; - } - - ubuf += block_len; - ret += block_len; - len -= block_len; - if (!len) + copied = copy_to_iter(block, sizeof(block), iter); + ret += copied; + if (!iov_iter_count(iter) || copied != sizeof(block)) break; - BUILD_BUG_ON(PAGE_SIZE % CHACHA_BLOCK_SIZE != 0); + BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0); if (ret % PAGE_SIZE == 0) { if (signal_pending(current)) break; @@ -499,7 +492,7 @@ static ssize_t get_random_bytes_user(void __user *ubuf, size_t len) } } - memzero_explicit(output, sizeof(output)); + memzero_explicit(block, sizeof(block)); out_zero_chacha: memzero_explicit(chacha_state, sizeof(chacha_state)); return ret ? ret : -EFAULT; @@ -1228,6 +1221,10 @@ static void __cold try_to_generate_entropy(void) SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags) { + struct iov_iter iter; + struct iovec iov; + int ret; + if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) return -EINVAL; @@ -1238,19 +1235,18 @@ SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) return -EINVAL; - if (len > INT_MAX) - len = INT_MAX; - if (!crng_ready() && !(flags & GRND_INSECURE)) { - int ret; - if (flags & GRND_NONBLOCK) return -EAGAIN; ret = wait_for_random_bytes(); if (unlikely(ret)) return ret; } - return get_random_bytes_user(ubuf, len); + + ret = import_single_range(READ, ubuf, len, &iov, &iter); + if (unlikely(ret)) + return ret; + return get_random_bytes_user(&iter); } static __poll_t random_poll(struct file *file, poll_table *wait) @@ -1294,8 +1290,7 @@ static ssize_t random_write(struct file *file, const char __user *ubuf, return (ssize_t)len; } -static ssize_t urandom_read(struct file *file, char __user *ubuf, - size_t len, loff_t *ppos) +static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *iter) { static int maxwarn = 10; @@ -1304,23 +1299,22 @@ static ssize_t urandom_read(struct file *file, char __user *ubuf, ++urandom_warning.missed; else if (ratelimit_disable || __ratelimit(&urandom_warning)) { --maxwarn; - pr_notice("%s: uninitialized urandom read (%zd bytes read)\n", - current->comm, len); + pr_notice("%s: uninitialized urandom read (%zu bytes read)\n", + current->comm, iov_iter_count(iter)); } } - return get_random_bytes_user(ubuf, len); + return get_random_bytes_user(iter); } -static ssize_t random_read(struct file *file, char __user *ubuf, - size_t len, loff_t *ppos) +static ssize_t random_read_iter(struct kiocb *kiocb, struct iov_iter *iter) { int ret; ret = wait_for_random_bytes(); if (ret != 0) return ret; - return get_random_bytes_user(ubuf, len); + return get_random_bytes_user(iter); } static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) @@ -1382,7 +1376,7 @@ static int random_fasync(int fd, struct file *filp, int on) } const struct file_operations random_fops = { - .read = random_read, + .read_iter = random_read_iter, .write = random_write, .poll = random_poll, .unlocked_ioctl = random_ioctl, @@ -1392,7 +1386,7 @@ const struct file_operations random_fops = { }; const struct file_operations urandom_fops = { - .read = urandom_read, + .read_iter = urandom_read_iter, .write = random_write, .unlocked_ioctl = random_ioctl, .compat_ioctl = compat_ptr_ioctl, -- GitLab From 0789c69644c8fb3eb4427e6fd8efea183623b673 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 May 2022 17:43:15 -0600 Subject: [PATCH 0276/4335] random: convert to using fops->write_iter() commit 22b0a222af4df8ee9bb8e07013ab44da9511b047 upstream. Now that the read side has been converted to fix a regression with splice, convert the write side as well to have some symmetry in the interface used (and help deprecate ->write()). Signed-off-by: Jens Axboe [Jason: cleaned up random_ioctl a bit, require full writes in RNDADDENTROPY since it's crediting entropy, simplify control flow of write_pool(), and incorporate suggestions from Al.] Cc: Al Viro Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 67 ++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 50af5da06a79e..e950f2f28e3d3 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1255,39 +1255,31 @@ static __poll_t random_poll(struct file *file, poll_table *wait) return crng_ready() ? EPOLLIN | EPOLLRDNORM : EPOLLOUT | EPOLLWRNORM; } -static int write_pool(const char __user *ubuf, size_t len) +static ssize_t write_pool(struct iov_iter *iter) { - size_t block_len; - int ret = 0; u8 block[BLAKE2S_BLOCK_SIZE]; + ssize_t ret = 0; + size_t copied; - while (len) { - block_len = min(len, sizeof(block)); - if (copy_from_user(block, ubuf, block_len)) { - ret = -EFAULT; - goto out; - } - len -= block_len; - ubuf += block_len; - mix_pool_bytes(block, block_len); + if (unlikely(!iov_iter_count(iter))) + return 0; + + for (;;) { + copied = copy_from_iter(block, sizeof(block), iter); + ret += copied; + mix_pool_bytes(block, copied); + if (!iov_iter_count(iter) || copied != sizeof(block)) + break; cond_resched(); } -out: memzero_explicit(block, sizeof(block)); - return ret; + return ret ? ret : -EFAULT; } -static ssize_t random_write(struct file *file, const char __user *ubuf, - size_t len, loff_t *ppos) +static ssize_t random_write_iter(struct kiocb *kiocb, struct iov_iter *iter) { - int ret; - - ret = write_pool(ubuf, len); - if (ret) - return ret; - - return (ssize_t)len; + return write_pool(iter); } static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *iter) @@ -1319,9 +1311,8 @@ static ssize_t random_read_iter(struct kiocb *kiocb, struct iov_iter *iter) static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) { - int size, ent_count; int __user *p = (int __user *)arg; - int retval; + int ent_count; switch (cmd) { case RNDGETENTCNT: @@ -1338,20 +1329,32 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EINVAL; credit_init_bits(ent_count); return 0; - case RNDADDENTROPY: + case RNDADDENTROPY: { + struct iov_iter iter; + struct iovec iov; + ssize_t ret; + int len; + if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(ent_count, p++)) return -EFAULT; if (ent_count < 0) return -EINVAL; - if (get_user(size, p++)) + if (get_user(len, p++)) + return -EFAULT; + ret = import_single_range(WRITE, p, len, &iov, &iter); + if (unlikely(ret)) + return ret; + ret = write_pool(&iter); + if (unlikely(ret < 0)) + return ret; + /* Since we're crediting, enforce that it was all written into the pool. */ + if (unlikely(ret != len)) return -EFAULT; - retval = write_pool((const char __user *)p, size); - if (retval < 0) - return retval; credit_init_bits(ent_count); return 0; + } case RNDZAPENTCNT: case RNDCLEARPOOL: /* No longer has any effect. */ @@ -1377,7 +1380,7 @@ static int random_fasync(int fd, struct file *filp, int on) const struct file_operations random_fops = { .read_iter = random_read_iter, - .write = random_write, + .write_iter = random_write_iter, .poll = random_poll, .unlocked_ioctl = random_ioctl, .compat_ioctl = compat_ptr_ioctl, @@ -1387,7 +1390,7 @@ const struct file_operations random_fops = { const struct file_operations urandom_fops = { .read_iter = urandom_read_iter, - .write = random_write, + .write_iter = random_write_iter, .unlocked_ioctl = random_ioctl, .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, -- GitLab From 3e167570a951e4f390667a8c3e4e2ecc0f3ef35c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 May 2022 17:31:37 -0600 Subject: [PATCH 0277/4335] random: wire up fops->splice_{read,write}_iter() commit 79025e727a846be6fd215ae9cdb654368ac3f9a6 upstream. Now that random/urandom is using {read,write}_iter, we can wire it up to using the generic splice handlers. Fixes: 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops") Signed-off-by: Jens Axboe [Jason: added the splice_write path. Note that sendfile() and such still does not work for read, though it does for write, because of a file type restriction in splice_direct_to_actor(), which I'll address separately.] Cc: Al Viro Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index e950f2f28e3d3..5ac332978c302 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1386,6 +1386,8 @@ const struct file_operations random_fops = { .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, .llseek = noop_llseek, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, }; const struct file_operations urandom_fops = { @@ -1395,6 +1397,8 @@ const struct file_operations urandom_fops = { .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, .llseek = noop_llseek, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, }; -- GitLab From ea5b87349d5af47d67378e92a61c84865cb6b691 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 22 May 2022 22:25:41 +0200 Subject: [PATCH 0278/4335] random: check for signals after page of pool writes commit 1ce6c8d68f8ac587f54d0a271ac594d3d51f3efb upstream. get_random_bytes_user() checks for signals after producing a PAGE_SIZE worth of output, just like /dev/zero does. write_pool() is doing basically the same work (actually, slightly more expensive), and so should stop to check for signals in the same way. Let's also name it write_pool_user() to match get_random_bytes_user(), so this won't be misused in the future. Before this patch, massive writes to /dev/urandom would tie up the process for an extremely long time and make it unterminatable. After, it can be successfully interrupted. The following test program can be used to see this works as intended: #include #include #include #include static unsigned char x[~0U]; static void handle(int) { } int main(int argc, char *argv[]) { pid_t pid = getpid(), child; int fd; signal(SIGUSR1, handle); if (!(child = fork())) { for (;;) kill(pid, SIGUSR1); } fd = open("/dev/urandom", O_WRONLY); pause(); printf("interrupted after writing %zd bytes\n", write(fd, x, sizeof(x))); close(fd); kill(child, SIGTERM); return 0; } Result before: "interrupted after writing 2147479552 bytes" Result after: "interrupted after writing 4096 bytes" Cc: Dominik Brodowski Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 5ac332978c302..ca17a658c2147 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1255,7 +1255,7 @@ static __poll_t random_poll(struct file *file, poll_table *wait) return crng_ready() ? EPOLLIN | EPOLLRDNORM : EPOLLOUT | EPOLLWRNORM; } -static ssize_t write_pool(struct iov_iter *iter) +static ssize_t write_pool_user(struct iov_iter *iter) { u8 block[BLAKE2S_BLOCK_SIZE]; ssize_t ret = 0; @@ -1270,7 +1270,13 @@ static ssize_t write_pool(struct iov_iter *iter) mix_pool_bytes(block, copied); if (!iov_iter_count(iter) || copied != sizeof(block)) break; - cond_resched(); + + BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0); + if (ret % PAGE_SIZE == 0) { + if (signal_pending(current)) + break; + cond_resched(); + } } memzero_explicit(block, sizeof(block)); @@ -1279,7 +1285,7 @@ static ssize_t write_pool(struct iov_iter *iter) static ssize_t random_write_iter(struct kiocb *kiocb, struct iov_iter *iter) { - return write_pool(iter); + return write_pool_user(iter); } static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *iter) @@ -1346,7 +1352,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) ret = import_single_range(WRITE, p, len, &iov, &iter); if (unlikely(ret)) return ret; - ret = write_pool(&iter); + ret = write_pool_user(&iter); if (unlikely(ret < 0)) return ret; /* Since we're crediting, enforce that it was all written into the pool. */ -- GitLab From 50196b5d73dcff0f9d060cd9b170ee987f4c7585 Mon Sep 17 00:00:00 2001 From: Edward Matijevic Date: Fri, 20 May 2022 23:45:15 -0500 Subject: [PATCH 0279/4335] ALSA: ctxfi: Add SB046x PCI ID commit 1b073ebb174d0c7109b438e0a5eb4495137803ec upstream. Adds the PCI ID for X-Fi cards sold under the Platnum and XtremeMusic names Before: snd_ctxfi 0000:05:05.0: chip 20K1 model Unknown (1102:0021) is found After: snd_ctxfi 0000:05:05.0: chip 20K1 model SB046x (1102:0021) is found [ This is only about defining the model name string, and the rest is handled just like before, as a default unknown device. Edward confirmed that the stuff has been working fine -- tiwai ] Signed-off-by: Edward Matijevic Cc: Link: https://lore.kernel.org/r/cae7d1a4-8bd9-7dfe-7427-db7e766f7272@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/ctxfi/ctatc.c | 2 ++ sound/pci/ctxfi/cthardware.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c index 78f35e88aed6b..fbdb8a3d5b8e5 100644 --- a/sound/pci/ctxfi/ctatc.c +++ b/sound/pci/ctxfi/ctatc.c @@ -36,6 +36,7 @@ | ((IEC958_AES3_CON_FS_48000) << 24)) static const struct snd_pci_quirk subsys_20k1_list[] = { + SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0021, "SB046x", CTSB046X), SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0022, "SB055x", CTSB055X), SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x002f, "SB055x", CTSB055X), SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0029, "SB073x", CTSB073X), @@ -64,6 +65,7 @@ static const struct snd_pci_quirk subsys_20k2_list[] = { static const char *ct_subsys_name[NUM_CTCARDS] = { /* 20k1 models */ + [CTSB046X] = "SB046x", [CTSB055X] = "SB055x", [CTSB073X] = "SB073x", [CTUAA] = "UAA", diff --git a/sound/pci/ctxfi/cthardware.h b/sound/pci/ctxfi/cthardware.h index f406b626a28c4..2875cec83b8f2 100644 --- a/sound/pci/ctxfi/cthardware.h +++ b/sound/pci/ctxfi/cthardware.h @@ -26,8 +26,9 @@ enum CHIPTYP { enum CTCARDS { /* 20k1 models */ + CTSB046X, + CT20K1_MODEL_FIRST = CTSB046X, CTSB055X, - CT20K1_MODEL_FIRST = CTSB055X, CTSB073X, CTUAA, CT20K1_UNKNOWN, -- GitLab From 4e67be407725b1d8b829ed2075987037abec98ec Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 30 May 2022 09:29:18 +0200 Subject: [PATCH 0280/4335] Linux 5.15.44 Link: https://lore.kernel.org/r/20220527084850.364560116@linuxfoundation.org Tested-by: Guenter Roeck Tested-by: Bagas Sanjaya Tested-by: Linux Kernel Functional Testing Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6192e6be49c36..b8ce2ba174862 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 43 +SUBLEVEL = 44 EXTRAVERSION = NAME = Trick or Treat -- GitLab From 09901136e79de0774d77e64147c86092fd511824 Mon Sep 17 00:00:00 2001 From: Forest Crossman Date: Tue, 3 May 2022 19:24:44 -0500 Subject: [PATCH 0281/4335] ALSA: usb-audio: Don't get sample rate for MCT Trigger 5 USB-to-HDMI [ Upstream commit d7be213849232a2accb219d537edf056d29186b4 ] This device doesn't support reading the sample rate, so we need to apply this quirk to avoid a 15-second delay waiting for three timeouts. Signed-off-by: Forest Crossman Link: https://lore.kernel.org/r/20220504002444.114011-2-cyrozap@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index ab9f3da49941f..fbbe59054c3fb 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1822,6 +1822,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x06f8, 0xd002, /* Hercules DJ Console (Macintosh Edition) */ QUIRK_FLAG_IGNORE_CTL_ERROR), + DEVICE_FLG(0x0711, 0x5800, /* MCT Trigger 5 USB-to-HDMI */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x074d, 0x3553, /* Outlaw RR2150 (Micronas UAC3553B) */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */ -- GitLab From 8ff411998a333aa4ea07fe23d3ff4bba07445e2b Mon Sep 17 00:00:00 2001 From: IotaHydrae Date: Wed, 4 May 2022 19:59:04 +0800 Subject: [PATCH 0282/4335] pinctrl: sunxi: fix f1c100s uart2 function [ Upstream commit fa8785e5931367e2b43f2c507f26bcf3e281c0ca ] Change suniv f1c100s pinctrl,PD14 multiplexing function lvds1 to uart2 When the pin PD13 and PD14 is setting up to uart2 function in dts, there's an error occurred: 1c20800.pinctrl: unsupported function uart2 on pin PD14 Because 'uart2' is not any one multiplexing option of PD14, and pinctrl don't know how to configure it. So change the pin PD14 lvds1 function to uart2. Signed-off-by: IotaHydrae Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/tencent_70C1308DDA794C81CAEF389049055BACEC09@qq.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c b/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c index 2801ca7062732..68a5b627fb9b2 100644 --- a/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c +++ b/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c @@ -204,7 +204,7 @@ static const struct sunxi_desc_pin suniv_f1c100s_pins[] = { SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), SUNXI_FUNCTION(0x2, "lcd"), /* D20 */ - SUNXI_FUNCTION(0x3, "lvds1"), /* RX */ + SUNXI_FUNCTION(0x3, "uart2"), /* RX */ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 14)), SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 15), SUNXI_FUNCTION(0x0, "gpio_in"), -- GitLab From 8243f5768dea24fe286a6323dc200225e7cff891 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 13 May 2022 09:26:07 +0000 Subject: [PATCH 0283/4335] KVM: arm64: Don't hypercall before EL2 init [ Upstream commit 2e40316753ee552fb598e8da8ca0d20a04e67453 ] Will reported the following splat when running with Protected KVM enabled: [ 2.427181] ------------[ cut here ]------------ [ 2.427668] WARNING: CPU: 3 PID: 1 at arch/arm64/kvm/mmu.c:489 __create_hyp_private_mapping+0x118/0x1ac [ 2.428424] Modules linked in: [ 2.429040] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.18.0-rc2-00084-g8635adc4efc7 #1 [ 2.429589] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 [ 2.430286] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 2.430734] pc : __create_hyp_private_mapping+0x118/0x1ac [ 2.431091] lr : create_hyp_exec_mappings+0x40/0x80 [ 2.431377] sp : ffff80000803baf0 [ 2.431597] x29: ffff80000803bb00 x28: 0000000000000000 x27: 0000000000000000 [ 2.432156] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 [ 2.432561] x23: ffffcd96c343b000 x22: 0000000000000000 x21: ffff80000803bb40 [ 2.433004] x20: 0000000000000004 x19: 0000000000001800 x18: 0000000000000000 [ 2.433343] x17: 0003e68cf7efdd70 x16: 0000000000000004 x15: fffffc81f602a2c8 [ 2.434053] x14: ffffdf8380000000 x13: ffffcd9573200000 x12: ffffcd96c343b000 [ 2.434401] x11: 0000000000000004 x10: ffffcd96c1738000 x9 : 0000000000000004 [ 2.434812] x8 : ffff80000803bb40 x7 : 7f7f7f7f7f7f7f7f x6 : 544f422effff306b [ 2.435136] x5 : 000000008020001e x4 : ffff207d80a88c00 x3 : 0000000000000005 [ 2.435480] x2 : 0000000000001800 x1 : 000000014f4ab800 x0 : 000000000badca11 [ 2.436149] Call trace: [ 2.436600] __create_hyp_private_mapping+0x118/0x1ac [ 2.437576] create_hyp_exec_mappings+0x40/0x80 [ 2.438180] kvm_init_vector_slots+0x180/0x194 [ 2.458941] kvm_arch_init+0x80/0x274 [ 2.459220] kvm_init+0x48/0x354 [ 2.459416] arm_init+0x20/0x2c [ 2.459601] do_one_initcall+0xbc/0x238 [ 2.459809] do_initcall_level+0x94/0xb4 [ 2.460043] do_initcalls+0x54/0x94 [ 2.460228] do_basic_setup+0x1c/0x28 [ 2.460407] kernel_init_freeable+0x110/0x178 [ 2.460610] kernel_init+0x20/0x1a0 [ 2.460817] ret_from_fork+0x10/0x20 [ 2.461274] ---[ end trace 0000000000000000 ]--- Indeed, the Protected KVM mode promotes __create_hyp_private_mapping() to a hypercall as EL1 no longer has access to the hypervisor's stage-1 page-table. However, the call from kvm_init_vector_slots() happens after pKVM has been initialized on the primary CPU, but before it has been initialized on secondaries. As such, if the KVM initcall procedure is migrated from one CPU to another in this window, the hypercall may end up running on a CPU for which EL2 has not been initialized. Fortunately, the pKVM hypervisor doesn't rely on the host to re-map the vectors in the private range, so the hypercall in question is in fact superfluous. Skip it when pKVM is enabled. Reported-by: Will Deacon Signed-off-by: Quentin Perret [maz: simplified the checks slightly] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220513092607.35233-1-qperret@google.com Signed-off-by: Sasha Levin --- arch/arm64/kvm/arm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 0b2f684cd8ca5..a30c036577a32 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1458,7 +1458,8 @@ static int kvm_init_vector_slots(void) base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); - if (kvm_system_needs_idmapped_vectors() && !has_vhe()) { + if (kvm_system_needs_idmapped_vectors() && + !is_protected_kvm_enabled()) { err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), __BP_HARDEN_HYP_VECS_SZ, &base); if (err) -- GitLab From 20b413c38b7c36b4251f55ec656644d10af59e2f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 May 2022 02:13:40 -0400 Subject: [PATCH 0284/4335] percpu_ref_init(): clean ->percpu_count_ref on failure [ Upstream commit a91714312eb16f9ecd1f7f8b3efe1380075f28d4 ] That way percpu_ref_exit() is safe after failing percpu_ref_init(). At least one user (cgroup_create()) had a double-free that way; there might be other similar bugs. Easier to fix in percpu_ref_init(), rather than playing whack-a-mole in sloppy users... Usual symptoms look like a messed refcounting in one of subsystems that use percpu allocations (might be percpu-refcount, might be something else). Having refcounts for two different objects share memory is Not Nice(tm)... Reported-by: syzbot+5b1e53987f858500ec00@syzkaller.appspotmail.com Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- lib/percpu-refcount.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index af9302141bcf6..e5c5315da2741 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -76,6 +76,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, data = kzalloc(sizeof(*ref->data), gfp); if (!data) { free_percpu((void __percpu *)ref->percpu_count_ptr); + ref->percpu_count_ptr = 0; return -ENOMEM; } -- GitLab From 7e18fd12489b621fc7aad14e6da86f6c6daa5e92 Mon Sep 17 00:00:00 2001 From: Thomas Bartschies Date: Wed, 18 May 2022 08:32:18 +0200 Subject: [PATCH 0285/4335] net: af_key: check encryption module availability consistency [ Upstream commit 015c44d7bff3f44d569716117becd570c179ca32 ] Since the recent introduction supporting the SM3 and SM4 hash algos for IPsec, the kernel produces invalid pfkey acquire messages, when these encryption modules are disabled. This happens because the availability of the algos wasn't checked in all necessary functions. This patch adds these checks. Signed-off-by: Thomas Bartschies Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/key/af_key.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 92e9d75dba2f4..339d95df19d32 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2900,7 +2900,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) break; if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg)) + if (aalg_tmpl_set(t, aalg) && aalg->available) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); @@ -2918,7 +2918,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg->pfkey_supported) continue; - if (!(ealg_tmpl_set(t, ealg))) + if (!(ealg_tmpl_set(t, ealg) && ealg->available)) continue; for (k = 1; ; k++) { @@ -2929,7 +2929,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg)) + if (aalg_tmpl_set(t, aalg) && aalg->available) sz += sizeof(struct sadb_comb); } } -- GitLab From 49651497b637bdba252c7b8143f32fc87fd6e976 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Wed, 18 May 2022 18:53:21 +0800 Subject: [PATCH 0286/4335] nfc: pn533: Fix buggy cleanup order [ Upstream commit b8cedb7093b2d1394cae9b86494cba4b62d3a30a ] When removing the pn533 device (i2c or USB), there is a logic error. The original code first cancels the worker (flush_delayed_work) and then destroys the workqueue (destroy_workqueue), leaving the timer the last one to be deleted (del_timer). This result in a possible race condition in a multi-core preempt-able kernel. That is, if the cleanup (pn53x_common_clean) is concurrently run with the timer handler (pn533_listen_mode_timer), the timer can queue the poll_work to the already destroyed workqueue, causing use-after-free. This patch reorder the cleanup: it uses the del_timer_sync to make sure the handler is finished before the routine will destroy the workqueue. Note that the timer cannot be activated by the worker again. static void pn533_wq_poll(struct work_struct *work) ... rc = pn533_send_poll_frame(dev); if (rc) return; if (cur_mod->len == 0 && dev->poll_mod_count > 1) mod_timer(&dev->listen_timer, ...); That is, the mod_timer can be called only when pn533_send_poll_frame() returns no error, which is impossible because the device is detaching and the lower driver should return ENODEV code. Signed-off-by: Lin Ma Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/pn533/pn533.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c index d32aec0c334fe..6dc0af63440f4 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c @@ -2789,13 +2789,14 @@ void pn53x_common_clean(struct pn533 *priv) { struct pn533_cmd *cmd, *n; + /* delete the timer before cleanup the worker */ + del_timer_sync(&priv->listen_timer); + flush_delayed_work(&priv->poll_work); destroy_workqueue(priv->wq); skb_queue_purge(&priv->resp_q); - del_timer(&priv->listen_timer); - list_for_each_entry_safe(cmd, n, &priv->cmd_queue, queue) { list_del(&cmd->queue); kfree(cmd); -- GitLab From 731561de2aeb2957ef55bb17d3d9cb39b9ded2ba Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 17 May 2022 18:52:17 +0930 Subject: [PATCH 0287/4335] net: ftgmac100: Disable hardware checksum on AST2600 [ Upstream commit 6fd45e79e8b93b8d22fb8fe22c32fbad7e9190bd ] The AST2600 when using the i210 NIC over NC-SI has been observed to produce incorrect checksum results with specific MTU values. This was first observed when sending data across a long distance set of networks. On a local network, the following test was performed using a 1MB file of random data. On the receiver run this script: #!/bin/bash while [ 1 ]; do # Zero the stats nstat -r > /dev/null nc -l 9899 > test-file # Check for checksum errors TcpInCsumErrors=$(nstat | grep TcpInCsumErrors) if [ -z "$TcpInCsumErrors" ]; then echo No TcpInCsumErrors else echo TcpInCsumErrors = $TcpInCsumErrors fi done On an AST2600 system: # nc 9899 < test-file The test was repeated with various MTU values: # ip link set mtu 1410 dev eth0 The observed results: 1500 - good 1434 - bad 1400 - good 1410 - bad 1420 - good The test was repeated after disabling tx checksumming: # ethtool -K eth0 tx-checksumming off And all MTU values tested resulted in transfers without error. An issue with the driver cannot be ruled out, however there has been no bug discovered so far. David has done the work to take the original bug report of slow data transfer between long distance connections and triaged it down to this test case. The vendor suspects this this is a hardware issue when using NC-SI. The fixes line refers to the patch that introduced AST2600 support. Reported-by: David Wilder Reviewed-by: Dylan Hung Signed-off-by: Joel Stanley Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/faraday/ftgmac100.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index e1df2dc810a28..0b833572205f3 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1910,6 +1910,11 @@ static int ftgmac100_probe(struct platform_device *pdev) /* AST2400 doesn't have working HW checksum generation */ if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac"))) netdev->hw_features &= ~NETIF_F_HW_CSUM; + + /* AST2600 tx checksum with NCSI is broken */ + if (priv->use_ncsi && of_device_is_compatible(np, "aspeed,ast2600-mac")) + netdev->hw_features &= ~NETIF_F_HW_CSUM; + if (np && of_get_property(np, "no-hw-checksum", NULL)) netdev->hw_features &= ~(NETIF_F_HW_CSUM | NETIF_F_RXCSUM); netdev->features |= netdev->hw_features; -- GitLab From 71475936e647c192d6c5321f1dc582c0f9573413 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 27 Apr 2022 13:19:10 +0300 Subject: [PATCH 0288/4335] i2c: ismt: Provide a DMA buffer for Interrupt Cause Logging [ Upstream commit 17a0f3acdc6ec8b89ad40f6e22165a4beee25663 ] Before sending a MSI the hardware writes information pertinent to the interrupt cause to a memory location pointed by SMTICL register. This memory holds three double words where the least significant bit tells whether the interrupt cause of master/target/error is valid. The driver does not use this but we need to set it up because otherwise it will perform DMA write to the default address (0) and this will cause an IOMMU fault such as below: DMAR: DRHD: handling fault status reg 2 DMAR: [DMA Write] Request device [00:12.0] PASID ffffffff fault addr 0 [fault reason 05] PTE Write access is not set To prevent this from happening, provide a proper DMA buffer for this that then gets mapped by the IOMMU accordingly. Signed-off-by: Mika Westerberg Reviewed-by: From: Andy Shevchenko Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-ismt.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index a6187cbec2c94..af2c240e064e1 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -82,6 +82,7 @@ #define ISMT_DESC_ENTRIES 2 /* number of descriptor entries */ #define ISMT_MAX_RETRIES 3 /* number of SMBus retries to attempt */ +#define ISMT_LOG_ENTRIES 3 /* number of interrupt cause log entries */ /* Hardware Descriptor Constants - Control Field */ #define ISMT_DESC_CWRL 0x01 /* Command/Write Length */ @@ -175,6 +176,8 @@ struct ismt_priv { u8 head; /* ring buffer head pointer */ struct completion cmp; /* interrupt completion */ u8 buffer[I2C_SMBUS_BLOCK_MAX + 16]; /* temp R/W data buffer */ + dma_addr_t log_dma; + u32 *log; }; static const struct pci_device_id ismt_ids[] = { @@ -411,6 +414,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, memset(desc, 0, sizeof(struct ismt_desc)); desc->tgtaddr_rw = ISMT_DESC_ADDR_RW(addr, read_write); + /* Always clear the log entries */ + memset(priv->log, 0, ISMT_LOG_ENTRIES * sizeof(u32)); + /* Initialize common control bits */ if (likely(pci_dev_msi_enabled(priv->pci_dev))) desc->control = ISMT_DESC_INT | ISMT_DESC_FAIR; @@ -708,6 +714,8 @@ static void ismt_hw_init(struct ismt_priv *priv) /* initialize the Master Descriptor Base Address (MDBA) */ writeq(priv->io_rng_dma, priv->smba + ISMT_MSTR_MDBA); + writeq(priv->log_dma, priv->smba + ISMT_GR_SMTICL); + /* initialize the Master Control Register (MCTRL) */ writel(ISMT_MCTRL_MEIE, priv->smba + ISMT_MSTR_MCTRL); @@ -795,6 +803,12 @@ static int ismt_dev_init(struct ismt_priv *priv) priv->head = 0; init_completion(&priv->cmp); + priv->log = dmam_alloc_coherent(&priv->pci_dev->dev, + ISMT_LOG_ENTRIES * sizeof(u32), + &priv->log_dma, GFP_KERNEL); + if (!priv->log) + return -ENOMEM; + return 0; } -- GitLab From f55c75cf73c0854c8b24bbc97a45d78e786cd8dc Mon Sep 17 00:00:00 2001 From: Piyush Malgujar Date: Wed, 11 May 2022 06:36:59 -0700 Subject: [PATCH 0289/4335] drivers: i2c: thunderx: Allow driver to work with ACPI defined TWSI controllers [ Upstream commit 03a35bc856ddc09f2cc1f4701adecfbf3b464cb3 ] Due to i2c->adap.dev.fwnode not being set, ACPI_COMPANION() wasn't properly found for TWSI controllers. Signed-off-by: Szymon Balcerak Signed-off-by: Piyush Malgujar Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-thunderx-pcidrv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-thunderx-pcidrv.c b/drivers/i2c/busses/i2c-thunderx-pcidrv.c index 12c90aa0900e6..a77cd86fe75ed 100644 --- a/drivers/i2c/busses/i2c-thunderx-pcidrv.c +++ b/drivers/i2c/busses/i2c-thunderx-pcidrv.c @@ -213,6 +213,7 @@ static int thunder_i2c_probe_pci(struct pci_dev *pdev, i2c->adap.bus_recovery_info = &octeon_i2c_recovery_info; i2c->adap.dev.parent = dev; i2c->adap.dev.of_node = pdev->dev.of_node; + i2c->adap.dev.fwnode = dev->fwnode; snprintf(i2c->adap.name, sizeof(i2c->adap.name), "Cavium ThunderX i2c adapter at %s", dev_name(dev)); i2c_set_adapdata(&i2c->adap, i2c); -- GitLab From f692bcffd1f2ce5488d24fbcb8eab5f351abf79d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 25 May 2022 10:36:38 +0200 Subject: [PATCH 0290/4335] netfilter: nf_tables: disallow non-stateful expression in sets earlier commit 520778042ccca019f3ffa136dd0ca565c486cedd upstream. Since 3e135cd499bf ("netfilter: nft_dynset: dynamic stateful expression instantiation"), it is possible to attach stateful expressions to set elements. cd5125d8f518 ("netfilter: nf_tables: split set destruction in deactivate and destroy phase") introduces conditional destruction on the object to accomodate transaction semantics. nft_expr_init() calls expr->ops->init() first, then check for NFT_STATEFUL_EXPR, this stills allows to initialize a non-stateful lookup expressions which points to a set, which might lead to UAF since the set is not properly detached from the set->binding for this case. Anyway, this combination is non-sense from nf_tables perspective. This patch fixes this problem by checking for NFT_STATEFUL_EXPR before expr->ops->init() is called. The reporter provides a KASAN splat and a poc reproducer (similar to those autogenerated by syzbot to report use-after-free errors). It is unknown to me if they are using syzbot or if they use similar automated tool to locate the bug that they are reporting. For the record, this is the KASAN splat. [ 85.431824] ================================================================== [ 85.432901] BUG: KASAN: use-after-free in nf_tables_bind_set+0x81b/0xa20 [ 85.433825] Write of size 8 at addr ffff8880286f0e98 by task poc/776 [ 85.434756] [ 85.434999] CPU: 1 PID: 776 Comm: poc Tainted: G W 5.18.0+ #2 [ 85.436023] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 Fixes: 0b2d8a7b638b ("netfilter: nf_tables: add helper functions for expression handling") Reported-and-tested-by: Aaron Adams Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2feb88ffcd81f..91865dc2bcbb8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2778,27 +2778,31 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, err = nf_tables_expr_parse(ctx, nla, &expr_info); if (err < 0) - goto err1; + goto err_expr_parse; + + err = -EOPNOTSUPP; + if (!(expr_info.ops->type->flags & NFT_EXPR_STATEFUL)) + goto err_expr_stateful; err = -ENOMEM; expr = kzalloc(expr_info.ops->size, GFP_KERNEL); if (expr == NULL) - goto err2; + goto err_expr_stateful; err = nf_tables_newexpr(ctx, &expr_info, expr); if (err < 0) - goto err3; + goto err_expr_new; return expr; -err3: +err_expr_new: kfree(expr); -err2: +err_expr_stateful: owner = expr_info.ops->type->owner; if (expr_info.ops->type->release_ops) expr_info.ops->type->release_ops(expr_info.ops); module_put(owner); -err1: +err_expr_parse: return ERR_PTR(err); } @@ -5318,9 +5322,6 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, return expr; err = -EOPNOTSUPP; - if (!(expr->ops->type->flags & NFT_EXPR_STATEFUL)) - goto err_set_elem_expr; - if (expr->ops->type->flags & NFT_EXPR_GC) { if (set->flags & NFT_SET_TIMEOUT) goto err_set_elem_expr; -- GitLab From 24c6fc6e7453f64cf6cbb4218c62aafdecc16ee1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 2 Jun 2022 14:02:18 +0300 Subject: [PATCH 0291/4335] i2c: ismt: prevent memory corruption in ismt_access() commit 690b2549b19563ec5ad53e5c82f6a944d910086e upstream. The "data->block[0]" variable comes from the user and is a number between 0-255. It needs to be capped to prevent writing beyond the end of dma_buffer[]. Fixes: 5e9a97b1f449 ("i2c: ismt: Adding support for I2C_SMBUS_BLOCK_PROC_CALL") Reported-and-tested-by: Zheyu Ma Signed-off-by: Dan Carpenter Reviewed-by: Mika Westerberg Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-ismt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index af2c240e064e1..483428c5e30b9 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -528,6 +528,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, case I2C_SMBUS_BLOCK_PROC_CALL: dev_dbg(dev, "I2C_SMBUS_BLOCK_PROC_CALL\n"); + if (data->block[0] > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + dma_size = I2C_SMBUS_BLOCK_MAX; desc->tgtaddr_rw = ISMT_DESC_ADDR_RW(addr, 1); desc->wr_len_cmd = data->block[0] + 1; -- GitLab From 8a3db00ab0e20346ebcae58968a270350b164df1 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Thu, 19 May 2022 09:50:30 +0100 Subject: [PATCH 0292/4335] assoc_array: Fix BUG_ON during garbage collect commit d1dc87763f406d4e67caf16dbe438a5647692395 upstream. A rare BUG_ON triggered in assoc_array_gc: [3430308.818153] kernel BUG at lib/assoc_array.c:1609! Which corresponded to the statement currently at line 1593 upstream: BUG_ON(assoc_array_ptr_is_meta(p)); Using the data from the core dump, I was able to generate a userspace reproducer[1] and determine the cause of the bug. [1]: https://github.com/brenns10/kernel_stuff/tree/master/assoc_array_gc After running the iterator on the entire branch, an internal tree node looked like the following: NODE (nr_leaves_on_branch: 3) SLOT [0] NODE (2 leaves) SLOT [1] NODE (1 leaf) SLOT [2..f] NODE (empty) In the userspace reproducer, the pr_devel output when compressing this node was: -- compress node 0x5607cc089380 -- free=0, leaves=0 [0] retain node 2/1 [nx 0] [1] fold node 1/1 [nx 0] [2] fold node 0/1 [nx 2] [3] fold node 0/2 [nx 2] [4] fold node 0/3 [nx 2] [5] fold node 0/4 [nx 2] [6] fold node 0/5 [nx 2] [7] fold node 0/6 [nx 2] [8] fold node 0/7 [nx 2] [9] fold node 0/8 [nx 2] [10] fold node 0/9 [nx 2] [11] fold node 0/10 [nx 2] [12] fold node 0/11 [nx 2] [13] fold node 0/12 [nx 2] [14] fold node 0/13 [nx 2] [15] fold node 0/14 [nx 2] after: 3 At slot 0, an internal node with 2 leaves could not be folded into the node, because there was only one available slot (slot 0). Thus, the internal node was retained. At slot 1, the node had one leaf, and was able to be folded in successfully. The remaining nodes had no leaves, and so were removed. By the end of the compression stage, there were 14 free slots, and only 3 leaf nodes. The tree was ascended and then its parent node was compressed. When this node was seen, it could not be folded, due to the internal node it contained. The invariant for compression in this function is: whenever nr_leaves_on_branch < ASSOC_ARRAY_FAN_OUT, the node should contain all leaf nodes. The compression step currently cannot guarantee this, given the corner case shown above. To fix this issue, retry compression whenever we have retained a node, and yet nr_leaves_on_branch < ASSOC_ARRAY_FAN_OUT. This second compression will then allow the node in slot 1 to be folded in, satisfying the invariant. Below is the output of the reproducer once the fix is applied: -- compress node 0x560e9c562380 -- free=0, leaves=0 [0] retain node 2/1 [nx 0] [1] fold node 1/1 [nx 0] [2] fold node 0/1 [nx 2] [3] fold node 0/2 [nx 2] [4] fold node 0/3 [nx 2] [5] fold node 0/4 [nx 2] [6] fold node 0/5 [nx 2] [7] fold node 0/6 [nx 2] [8] fold node 0/7 [nx 2] [9] fold node 0/8 [nx 2] [10] fold node 0/9 [nx 2] [11] fold node 0/10 [nx 2] [12] fold node 0/11 [nx 2] [13] fold node 0/12 [nx 2] [14] fold node 0/13 [nx 2] [15] fold node 0/14 [nx 2] internal nodes remain despite enough space, retrying -- compress node 0x560e9c562380 -- free=14, leaves=1 [0] fold node 2/15 [nx 0] after: 3 Changes ======= DH: - Use false instead of 0. - Reorder the inserted lines in a couple of places to put retained before next_slot. ver #2) - Fix typo in pr_devel, correct comparison to "<=" Fixes: 3cb989501c26 ("Add a generic associative array implementation.") Cc: Signed-off-by: Stephen Brennan Signed-off-by: David Howells cc: Andrew Morton cc: keyrings@vger.kernel.org Link: https://lore.kernel.org/r/20220511225517.407935-1-stephen.s.brennan@oracle.com/ # v1 Link: https://lore.kernel.org/r/20220512215045.489140-1-stephen.s.brennan@oracle.com/ # v2 Reviewed-by: Jarkko Sakkinen Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/assoc_array.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 04c98799c3baf..70304b8f15ace 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -1462,6 +1462,7 @@ int assoc_array_gc(struct assoc_array *array, struct assoc_array_ptr *cursor, *ptr; struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; unsigned long nr_leaves_on_tree; + bool retained; int keylen, slot, nr_free, next_slot, i; pr_devel("-->%s()\n", __func__); @@ -1538,6 +1539,7 @@ continue_node: goto descend; } +retry_compress: pr_devel("-- compress node %p --\n", new_n); /* Count up the number of empty slots in this node and work out the @@ -1555,6 +1557,7 @@ continue_node: pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); /* See what we can fold in */ + retained = false; next_slot = 0; for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { struct assoc_array_shortcut *s; @@ -1604,9 +1607,14 @@ continue_node: pr_devel("[%d] retain node %lu/%d [nx %d]\n", slot, child->nr_leaves_on_branch, nr_free + 1, next_slot); + retained = true; } } + if (retained && new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { + pr_devel("internal nodes remain despite enough space, retrying\n"); + goto retry_compress; + } pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); nr_leaves_on_tree = new_n->nr_leaves_on_branch; -- GitLab From e6acf868ff0e09efd0a4bee109989640ac792ccf Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 29 Apr 2022 14:38:01 -0700 Subject: [PATCH 0293/4335] pipe: make poll_usage boolean and annotate its access commit f485922d8fe4e44f6d52a5bb95a603b7c65554bb upstream. Patch series "Fix data-races around epoll reported by KCSAN." This series suppresses a false positive KCSAN's message and fixes a real data-race. This patch (of 2): pipe_poll() runs locklessly and assigns 1 to poll_usage. Once poll_usage is set to 1, it never changes in other places. However, concurrent writes of a value trigger KCSAN, so let's make KCSAN happy. BUG: KCSAN: data-race in pipe_poll / pipe_poll write to 0xffff8880042f6678 of 4 bytes by task 174 on cpu 3: pipe_poll (fs/pipe.c:656) ep_item_poll.isra.0 (./include/linux/poll.h:88 fs/eventpoll.c:853) do_epoll_wait (fs/eventpoll.c:1692 fs/eventpoll.c:1806 fs/eventpoll.c:2234) __x64_sys_epoll_wait (fs/eventpoll.c:2246 fs/eventpoll.c:2241 fs/eventpoll.c:2241) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113) write to 0xffff8880042f6678 of 4 bytes by task 177 on cpu 1: pipe_poll (fs/pipe.c:656) ep_item_poll.isra.0 (./include/linux/poll.h:88 fs/eventpoll.c:853) do_epoll_wait (fs/eventpoll.c:1692 fs/eventpoll.c:1806 fs/eventpoll.c:2234) __x64_sys_epoll_wait (fs/eventpoll.c:2246 fs/eventpoll.c:2241 fs/eventpoll.c:2241) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113) Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 177 Comm: epoll_race Not tainted 5.17.0-58927-gf443e374ae13 #6 Hardware name: Red Hat KVM, BIOS 1.11.0-2.amzn2 04/01/2014 Link: https://lkml.kernel.org/r/20220322002653.33865-1-kuniyu@amazon.co.jp Link: https://lkml.kernel.org/r/20220322002653.33865-2-kuniyu@amazon.co.jp Fixes: 3b844826b6c6 ("pipe: avoid unnecessary EPOLLET wakeups under normal loads") Signed-off-by: Kuniyuki Iwashima Cc: Alexander Duyck Cc: Al Viro Cc: Davidlohr Bueso Cc: Kuniyuki Iwashima Cc: "Soheil Hassas Yeganeh" Cc: "Sridhar Samudrala" Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 2 +- include/linux/pipe_fs_i.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 751d5b36c84bb..a00babcfc67c9 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -652,7 +652,7 @@ pipe_poll(struct file *filp, poll_table *wait) unsigned int head, tail; /* Epoll has some historical nasty semantics, this enables them */ - pipe->poll_usage = 1; + WRITE_ONCE(pipe->poll_usage, true); /* * Reading pipe state only -- no need for acquiring the semaphore. diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index fc5642431b923..c0b6ec6bf65b7 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -71,7 +71,7 @@ struct pipe_inode_info { unsigned int files; unsigned int r_counter; unsigned int w_counter; - unsigned int poll_usage; + bool poll_usage; struct page *tmp_page; struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; -- GitLab From cf2fbc56c478a34a68ff1fa6ad08460054dfd499 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 26 May 2022 07:34:52 +0100 Subject: [PATCH 0294/4335] pipe: Fix missing lock in pipe_resize_ring() commit 189b0ddc245139af81198d1a3637cac74f96e13a upstream. pipe_resize_ring() needs to take the pipe->rd_wait.lock spinlock to prevent post_one_notification() from trying to insert into the ring whilst the ring is being replaced. The occupancy check must be done after the lock is taken, and the lock must be taken after the new ring is allocated. The bug can lead to an oops looking something like: BUG: KASAN: use-after-free in post_one_notification.isra.0+0x62e/0x840 Read of size 4 at addr ffff88801cc72a70 by task poc/27196 ... Call Trace: post_one_notification.isra.0+0x62e/0x840 __post_watch_notification+0x3b7/0x650 key_create_or_update+0xb8b/0xd20 __do_sys_add_key+0x175/0x340 __x64_sys_add_key+0xbe/0x140 do_syscall_64+0x5c/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae Reported by Selim Enes Karaduman @Enesdex working with Trend Micro Zero Day Initiative. Fixes: c73be61cede5 ("pipe: Add general notification queue support") Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-17291 Signed-off-by: David Howells Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index a00babcfc67c9..e08f0fe55584b 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1244,30 +1244,33 @@ unsigned int round_pipe_size(unsigned long size) /* * Resize the pipe ring to a number of slots. + * + * Note the pipe can be reduced in capacity, but only if the current + * occupancy doesn't exceed nr_slots; if it does, EBUSY will be + * returned instead. */ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) { struct pipe_buffer *bufs; unsigned int head, tail, mask, n; - /* - * We can shrink the pipe, if arg is greater than the ring occupancy. - * Since we don't expect a lot of shrink+grow operations, just free and - * allocate again like we would do for growing. If the pipe currently - * contains more buffers than arg, then return busy. - */ - mask = pipe->ring_size - 1; - head = pipe->head; - tail = pipe->tail; - n = pipe_occupancy(pipe->head, pipe->tail); - if (nr_slots < n) - return -EBUSY; - bufs = kcalloc(nr_slots, sizeof(*bufs), GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (unlikely(!bufs)) return -ENOMEM; + spin_lock_irq(&pipe->rd_wait.lock); + mask = pipe->ring_size - 1; + head = pipe->head; + tail = pipe->tail; + + n = pipe_occupancy(head, tail); + if (nr_slots < n) { + spin_unlock_irq(&pipe->rd_wait.lock); + kfree(bufs); + return -EBUSY; + } + /* * The pipe array wraps around, so just start the new one at zero * and adjust the indices. @@ -1299,6 +1302,8 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) pipe->tail = tail; pipe->head = head; + spin_unlock_irq(&pipe->rd_wait.lock); + /* This might have made more room for writers */ wake_up_interruptible(&pipe->wr_wait); return 0; -- GitLab From 23cb9eff90b154716693d2a406919decf8a8bd8a Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 21 Apr 2022 13:53:33 -0500 Subject: [PATCH 0295/4335] net: ipa: compute proper aggregation limit commit c5794097b269f15961ed78f7f27b50e51766dec9 upstream. The aggregation byte limit for an endpoint is currently computed based on the endpoint's receive buffer size. However, some bytes at the front of each receive buffer are reserved on the assumption that--as with SKBs--it might be useful to insert data (such as headers) before what lands in the buffer. The aggregation byte limit currently doesn't take into account that reserved space, and as a result, aggregation could require space past that which is available in the buffer. Fix this by reducing the size used to compute the aggregation byte limit by the NET_SKB_PAD offset reserved for each receive buffer. Signed-off-by: Alex Elder Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipa/ipa_endpoint.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 87e42db1b61e6..477eb4051bed7 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -722,13 +722,15 @@ static void ipa_endpoint_init_aggr(struct ipa_endpoint *endpoint) if (endpoint->data->aggregation) { if (!endpoint->toward_ipa) { + u32 buffer_size; bool close_eof; u32 limit; val |= u32_encode_bits(IPA_ENABLE_AGGR, AGGR_EN_FMASK); val |= u32_encode_bits(IPA_GENERIC, AGGR_TYPE_FMASK); - limit = ipa_aggr_size_kb(IPA_RX_BUFFER_SIZE); + buffer_size = IPA_RX_BUFFER_SIZE - NET_SKB_PAD; + limit = ipa_aggr_size_kb(buffer_size); val |= aggr_byte_limit_encoded(version, limit); limit = IPA_AGGR_TIME_LIMIT; -- GitLab From 195fffbf8291a84580762ac6e3101489954d0216 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 27 Apr 2022 17:47:14 -0500 Subject: [PATCH 0296/4335] drm/i915: Fix -Wstringop-overflow warning in call to intel_read_wm_latency() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 336feb502a715909a8136eb6a62a83d7268a353b upstream. Fix the following -Wstringop-overflow warnings when building with GCC-11: drivers/gpu/drm/i915/intel_pm.c:3106:9: warning: ‘intel_read_wm_latency’ accessing 16 bytes in a region of size 10 [-Wstringop-overflow=] 3106 | intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/intel_pm.c:3106:9: note: referencing argument 2 of type ‘u16 *’ {aka ‘short unsigned int *’} drivers/gpu/drm/i915/intel_pm.c:2861:13: note: in a call to function ‘intel_read_wm_latency’ 2861 | static void intel_read_wm_latency(struct drm_i915_private *dev_priv, | ^~~~~~~~~~~~~~~~~~~~~ by removing the over-specified array size from the argument declarations. It seems that this code is actually safe because the size of the array depends on the hardware generation, and the function checks for that. Notice that wm can be an array of 5 elements: drivers/gpu/drm/i915/intel_pm.c:3109: intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency); or an array of 8 elements: drivers/gpu/drm/i915/intel_pm.c:3131: intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency); and the compiler legitimately complains about that. This helps with the ongoing efforts to globally enable -Wstringop-overflow. Link: https://github.com/KSPP/linux/issues/181 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 57c1dda76b94a..1a27e4833adfa 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2863,7 +2863,7 @@ static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, } static void intel_read_wm_latency(struct drm_i915_private *dev_priv, - u16 wm[8]) + u16 wm[]) { struct intel_uncore *uncore = &dev_priv->uncore; -- GitLab From c504167adc3248095a905fa0700a9693897cb5ed Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 17 May 2022 08:13:08 +0900 Subject: [PATCH 0297/4335] exfat: check if cluster num is valid commit 64ba4b15e5c045f8b746c6da5fc9be9a6b00b61d upstream. Syzbot reported slab-out-of-bounds read in exfat_clear_bitmap. This was triggered by reproducer calling truncute with size 0, which causes the following trace: BUG: KASAN: slab-out-of-bounds in exfat_clear_bitmap+0x147/0x490 fs/exfat/balloc.c:174 Read of size 8 at addr ffff888115aa9508 by task syz-executor251/365 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack_lvl+0x1e2/0x24b lib/dump_stack.c:118 print_address_description+0x81/0x3c0 mm/kasan/report.c:233 __kasan_report mm/kasan/report.c:419 [inline] kasan_report+0x1a4/0x1f0 mm/kasan/report.c:436 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report_generic.c:309 exfat_clear_bitmap+0x147/0x490 fs/exfat/balloc.c:174 exfat_free_cluster+0x25a/0x4a0 fs/exfat/fatent.c:181 __exfat_truncate+0x99e/0xe00 fs/exfat/file.c:217 exfat_truncate+0x11b/0x4f0 fs/exfat/file.c:243 exfat_setattr+0xa03/0xd40 fs/exfat/file.c:339 notify_change+0xb76/0xe10 fs/attr.c:336 do_truncate+0x1ea/0x2d0 fs/open.c:65 Move the is_valid_cluster() helper from fatent.c to a common header to make it reusable in other *.c files. And add is_valid_cluster() to validate if cluster number is within valid range in exfat_clear_bitmap() and exfat_set_bitmap(). Link: https://syzkaller.appspot.com/bug?id=50381fc73821ecae743b8cf24b4c9a04776f767c Reported-by: syzbot+a4087e40b9c13aad7892@syzkaller.appspotmail.com Fixes: 1e49a94cf707 ("exfat: add bitmap operations") Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Tadeusz Struk Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon Signed-off-by: Greg Kroah-Hartman --- fs/exfat/balloc.c | 8 ++++++-- fs/exfat/exfat_fs.h | 8 ++++++++ fs/exfat/fatent.c | 8 -------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index cc5cffc4a7691..e2113e0a848c4 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -148,7 +148,9 @@ int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync) struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); - WARN_ON(clu < EXFAT_FIRST_CLUSTER); + if (!is_valid_cluster(sbi, clu)) + return -EINVAL; + ent_idx = CLUSTER_TO_BITMAP_ENT(clu); i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); @@ -166,7 +168,9 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_mount_options *opts = &sbi->options; - WARN_ON(clu < EXFAT_FIRST_CLUSTER); + if (!is_valid_cluster(sbi, clu)) + return; + ent_idx = CLUSTER_TO_BITMAP_ENT(clu); i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 1d6da61157c93..9f82a8a835eec 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -381,6 +381,14 @@ static inline int exfat_sector_to_cluster(struct exfat_sb_info *sbi, EXFAT_RESERVED_CLUSTERS; } +static inline bool is_valid_cluster(struct exfat_sb_info *sbi, + unsigned int clus) +{ + if (clus < EXFAT_FIRST_CLUSTER || sbi->num_clusters <= clus) + return false; + return true; +} + /* super.c */ int exfat_set_volume_dirty(struct super_block *sb); int exfat_clear_volume_dirty(struct super_block *sb); diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index e949e563443c9..421c273531049 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -81,14 +81,6 @@ int exfat_ent_set(struct super_block *sb, unsigned int loc, return 0; } -static inline bool is_valid_cluster(struct exfat_sb_info *sbi, - unsigned int clus) -{ - if (clus < EXFAT_FIRST_CLUSTER || sbi->num_clusters <= clus) - return false; - return true; -} - int exfat_ent_get(struct super_block *sb, unsigned int loc, unsigned int *content) { -- GitLab From e16cc79b0f916069de223bdb567fa0bc2ccd18a5 Mon Sep 17 00:00:00 2001 From: "Justin M. Forbes" Date: Thu, 2 Jun 2022 22:23:23 +0200 Subject: [PATCH 0298/4335] lib/crypto: add prompts back to crypto libraries commit e56e18985596617ae426ed5997fb2e737cffb58b upstream. Commit 6048fdcc5f269 ("lib/crypto: blake2s: include as built-in") took away a number of prompt texts from other crypto libraries. This makes values flip from built-in to module when oldconfig runs, and causes problems when these crypto libs need to be built in for thingslike BIG_KEYS. Fixes: 6048fdcc5f269 ("lib/crypto: blake2s: include as built-in") Cc: Herbert Xu Cc: linux-crypto@vger.kernel.org Signed-off-by: Justin M. Forbes [Jason: - moved menu into submenu of lib/ instead of root menu - fixed chacha sub-dependencies for CONFIG_CRYPTO] Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- crypto/Kconfig | 2 -- lib/Kconfig | 2 ++ lib/crypto/Kconfig | 17 ++++++++++++----- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 55718de561375..a346b6f74bb39 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1924,5 +1924,3 @@ source "crypto/asymmetric_keys/Kconfig" source "certs/Kconfig" endif # if CRYPTO - -source "lib/crypto/Kconfig" diff --git a/lib/Kconfig b/lib/Kconfig index fa4b10322efcd..e052f843afedc 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -121,6 +121,8 @@ config INDIRECT_IOMEM_FALLBACK mmio accesses when the IO memory address is not a registered emulated region. +source "lib/crypto/Kconfig" + config CRC_CCITT tristate "CRC-CCITT functions" help diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 8620f38e117c0..e8e525650cf29 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +menu "Crypto library routines" + config CRYPTO_LIB_AES tristate @@ -31,7 +33,7 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA config CRYPTO_LIB_CHACHA_GENERIC tristate - select CRYPTO_ALGAPI + select XOR_BLOCKS help This symbol can be depended upon by arch implementations of the ChaCha library interface that require the generic code as a @@ -40,7 +42,8 @@ config CRYPTO_LIB_CHACHA_GENERIC of CRYPTO_LIB_CHACHA. config CRYPTO_LIB_CHACHA - tristate + tristate "ChaCha library interface" + depends on CRYPTO depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n help @@ -65,7 +68,7 @@ config CRYPTO_LIB_CURVE25519_GENERIC of CRYPTO_LIB_CURVE25519. config CRYPTO_LIB_CURVE25519 - tristate + tristate "Curve25519 scalar multiplication library" depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519 select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n help @@ -100,7 +103,7 @@ config CRYPTO_LIB_POLY1305_GENERIC of CRYPTO_LIB_POLY1305. config CRYPTO_LIB_POLY1305 - tristate + tristate "Poly1305 library interface" depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305 select CRYPTO_LIB_POLY1305_GENERIC if CRYPTO_ARCH_HAVE_LIB_POLY1305=n help @@ -109,14 +112,18 @@ config CRYPTO_LIB_POLY1305 is available and enabled. config CRYPTO_LIB_CHACHA20POLY1305 - tristate + tristate "ChaCha20-Poly1305 AEAD support (8-byte nonce library version)" depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305 + depends on CRYPTO select CRYPTO_LIB_CHACHA select CRYPTO_LIB_POLY1305 + select CRYPTO_ALGAPI config CRYPTO_LIB_SHA256 tristate config CRYPTO_LIB_SM4 tristate + +endmenu -- GitLab From fa996803b9d6be9d7cf694e7a61d5133df982d27 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 2 Jun 2022 22:23:24 +0200 Subject: [PATCH 0299/4335] crypto: drbg - prepare for more fine-grained tracking of seeding state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ce8ce31b2c5c8b18667784b8c515650c65d57b4e upstream. There are two different randomness sources the DRBGs are getting seeded from, namely the jitterentropy source (if enabled) and get_random_bytes(). At initial DRBG seeding time during boot, the latter might not have collected sufficient entropy for seeding itself yet and thus, the DRBG implementation schedules a reseed work from a random_ready_callback once that has happened. This is particularly important for the !->pr DRBG instances, for which (almost) no further reseeds are getting triggered during their lifetime. Because collecting data from the jitterentropy source is a rather expensive operation, the aforementioned asynchronously scheduled reseed work restricts itself to get_random_bytes() only. That is, it in some sense amends the initial DRBG seed derived from jitterentropy output at full (estimated) entropy with fresh randomness obtained from get_random_bytes() once that has been seeded with sufficient entropy itself. With the advent of rng_is_initialized(), there is no real need for doing the reseed operation from an asynchronously scheduled work anymore and a subsequent patch will make it synchronous by moving it next to related logic already present in drbg_generate(). However, for tracking whether a full reseed including the jitterentropy source is required or a "partial" reseed involving only get_random_bytes() would be sufficient already, the boolean struct drbg_state's ->seeded member must become a tristate value. Prepare for this by introducing the new enum drbg_seed_state and change struct drbg_state's ->seeded member's type from bool to that type. For facilitating review, enum drbg_seed_state is made to only contain two members corresponding to the former ->seeded values of false and true resp. at this point: DRBG_SEED_STATE_UNSEEDED and DRBG_SEED_STATE_FULL. A third one for tracking the intermediate state of "seeded from jitterentropy only" will be introduced with a subsequent patch. There is no change in behaviour at this point. Signed-off-by: Nicolai Stange Reviewed-by: Stephan Müller Signed-off-by: Herbert Xu Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- crypto/drbg.c | 19 ++++++++++--------- include/crypto/drbg.h | 7 ++++++- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index 03c9ef768c227..35358119fcfb9 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1043,7 +1043,7 @@ static inline int __drbg_seed(struct drbg_state *drbg, struct list_head *seed, if (ret) return ret; - drbg->seeded = true; + drbg->seeded = DRBG_SEED_STATE_FULL; /* 10.1.1.2 / 10.1.1.3 step 5 */ drbg->reseed_ctr = 1; @@ -1088,14 +1088,14 @@ static void drbg_async_seed(struct work_struct *work) if (ret) goto unlock; - /* Set seeded to false so that if __drbg_seed fails the - * next generate call will trigger a reseed. + /* Reset ->seeded so that if __drbg_seed fails the next + * generate call will trigger a reseed. */ - drbg->seeded = false; + drbg->seeded = DRBG_SEED_STATE_UNSEEDED; __drbg_seed(drbg, &seedlist, true); - if (drbg->seeded) + if (drbg->seeded == DRBG_SEED_STATE_FULL) drbg->reseed_threshold = drbg_max_requests(drbg); unlock: @@ -1386,13 +1386,14 @@ static int drbg_generate(struct drbg_state *drbg, * here. The spec is a bit convoluted here, we make it simpler. */ if (drbg->reseed_threshold < drbg->reseed_ctr) - drbg->seeded = false; + drbg->seeded = DRBG_SEED_STATE_UNSEEDED; - if (drbg->pr || !drbg->seeded) { + if (drbg->pr || drbg->seeded == DRBG_SEED_STATE_UNSEEDED) { pr_devel("DRBG: reseeding before generation (prediction " "resistance: %s, state %s)\n", drbg->pr ? "true" : "false", - drbg->seeded ? "seeded" : "unseeded"); + (drbg->seeded == DRBG_SEED_STATE_FULL ? + "seeded" : "unseeded")); /* 9.3.1 steps 7.1 through 7.3 */ len = drbg_seed(drbg, addtl, true); if (len) @@ -1577,7 +1578,7 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers, if (!drbg->core) { drbg->core = &drbg_cores[coreref]; drbg->pr = pr; - drbg->seeded = false; + drbg->seeded = DRBG_SEED_STATE_UNSEEDED; drbg->reseed_threshold = drbg_max_requests(drbg); ret = drbg_alloc_state(drbg); diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index 88e4d145f7cda..2db72121d5680 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -105,6 +105,11 @@ struct drbg_test_data { struct drbg_string *testentropy; /* TEST PARAMETER: test entropy */ }; +enum drbg_seed_state { + DRBG_SEED_STATE_UNSEEDED, + DRBG_SEED_STATE_FULL, +}; + struct drbg_state { struct mutex drbg_mutex; /* lock around DRBG */ unsigned char *V; /* internal state 10.1.1.1 1a) */ @@ -127,7 +132,7 @@ struct drbg_state { struct crypto_wait ctr_wait; /* CTR mode async wait obj */ struct scatterlist sg_in, sg_out; /* CTR mode SGLs */ - bool seeded; /* DRBG fully seeded? */ + enum drbg_seed_state seeded; /* DRBG fully seeded? */ bool pr; /* Prediction resistance enabled? */ bool fips_primed; /* Continuous test primed? */ unsigned char *prev; /* FIPS 140-2 continuous test value */ -- GitLab From 585f6b76d354185493dbb391a5575d8001873054 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 2 Jun 2022 22:23:25 +0200 Subject: [PATCH 0300/4335] crypto: drbg - track whether DRBG was seeded with !rng_is_initialized() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2bcd25443868aa8863779a6ebc6c9319633025d2 upstream. Currently, the DRBG implementation schedules asynchronous works from random_ready_callbacks for reseeding the DRBG instances with output from get_random_bytes() once the latter has sufficient entropy available. However, as the get_random_bytes() initialization state can get queried by means of rng_is_initialized() now, there is no real need for this asynchronous reseeding logic anymore and it's better to keep things simple by doing it synchronously when needed instead, i.e. from drbg_generate() once rng_is_initialized() has flipped to true. Of course, for this to work, drbg_generate() would need some means by which it can tell whether or not rng_is_initialized() has flipped to true since the last seeding from get_random_bytes(). Or equivalently, whether or not the last seed from get_random_bytes() has happened when rng_is_initialized() was still evaluating to false. As it currently stands, enum drbg_seed_state allows for the representation of two different DRBG seeding states: DRBG_SEED_STATE_UNSEEDED and DRBG_SEED_STATE_FULL. The former makes drbg_generate() to invoke a full reseeding operation involving both, the rather expensive jitterentropy as well as the get_random_bytes() randomness sources. The DRBG_SEED_STATE_FULL state on the other hand implies that no reseeding at all is required for a !->pr DRBG variant. Introduce the new DRBG_SEED_STATE_PARTIAL state to enum drbg_seed_state for representing the condition that a DRBG was being seeded when rng_is_initialized() had still been false. In particular, this new state implies that - the given DRBG instance has been fully seeded from the jitterentropy source (if enabled) - and drbg_generate() is supposed to reseed from get_random_bytes() *only* once rng_is_initialized() turns to true. Up to now, the __drbg_seed() helper used to set the given DRBG instance's ->seeded state to constant DRBG_SEED_STATE_FULL. Introduce a new argument allowing for the specification of the to be written ->seeded value instead. Make the first of its two callers, drbg_seed(), determine the appropriate value based on rng_is_initialized(). The remaining caller, drbg_async_seed(), is known to get invoked only once rng_is_initialized() is true, hence let it pass constant DRBG_SEED_STATE_FULL for the new argument to __drbg_seed(). There is no change in behaviour, except for that the pr_devel() in drbg_generate() would now report "unseeded" for ->pr DRBG instances which had last been seeded when rng_is_initialized() was still evaluating to false. Signed-off-by: Nicolai Stange Reviewed-by: Stephan Müller Signed-off-by: Herbert Xu Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- crypto/drbg.c | 12 ++++++++---- include/crypto/drbg.h | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index 35358119fcfb9..bc5a533c09601 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1036,14 +1036,14 @@ static const struct drbg_state_ops drbg_hash_ops = { ******************************************************************/ static inline int __drbg_seed(struct drbg_state *drbg, struct list_head *seed, - int reseed) + int reseed, enum drbg_seed_state new_seed_state) { int ret = drbg->d_ops->update(drbg, seed, reseed); if (ret) return ret; - drbg->seeded = DRBG_SEED_STATE_FULL; + drbg->seeded = new_seed_state; /* 10.1.1.2 / 10.1.1.3 step 5 */ drbg->reseed_ctr = 1; @@ -1093,7 +1093,7 @@ static void drbg_async_seed(struct work_struct *work) */ drbg->seeded = DRBG_SEED_STATE_UNSEEDED; - __drbg_seed(drbg, &seedlist, true); + __drbg_seed(drbg, &seedlist, true, DRBG_SEED_STATE_FULL); if (drbg->seeded == DRBG_SEED_STATE_FULL) drbg->reseed_threshold = drbg_max_requests(drbg); @@ -1123,6 +1123,7 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, unsigned int entropylen = drbg_sec_strength(drbg->core->flags); struct drbg_string data1; LIST_HEAD(seedlist); + enum drbg_seed_state new_seed_state = DRBG_SEED_STATE_FULL; /* 9.1 / 9.2 / 9.3.1 step 3 */ if (pers && pers->len > (drbg_max_addtl(drbg))) { @@ -1150,6 +1151,9 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, BUG_ON((entropylen * 2) > sizeof(entropy)); /* Get seed from in-kernel /dev/urandom */ + if (!rng_is_initialized()) + new_seed_state = DRBG_SEED_STATE_PARTIAL; + ret = drbg_get_random_bytes(drbg, entropy, entropylen); if (ret) goto out; @@ -1206,7 +1210,7 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, memset(drbg->C, 0, drbg_statelen(drbg)); } - ret = __drbg_seed(drbg, &seedlist, reseed); + ret = __drbg_seed(drbg, &seedlist, reseed, new_seed_state); out: memzero_explicit(entropy, entropylen * 2); diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index 2db72121d5680..01caab5e65dec 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -107,6 +107,7 @@ struct drbg_test_data { enum drbg_seed_state { DRBG_SEED_STATE_UNSEEDED, + DRBG_SEED_STATE_PARTIAL, /* Seeded with !rng_is_initialized() */ DRBG_SEED_STATE_FULL, }; -- GitLab From da208708f4c549995118c20add7e19c25b2215c5 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 2 Jun 2022 22:23:26 +0200 Subject: [PATCH 0301/4335] crypto: drbg - move dynamic ->reseed_threshold adjustments to __drbg_seed() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 262d83a4290c331cd4f617a457408bdb82fbb738 upstream. Since commit 42ea507fae1a ("crypto: drbg - reseed often if seedsource is degraded"), the maximum seed lifetime represented by ->reseed_threshold gets temporarily lowered if the get_random_bytes() source cannot provide sufficient entropy yet, as is common during boot, and restored back to the original value again once that has changed. More specifically, if the add_random_ready_callback() invoked from drbg_prepare_hrng() in the course of DRBG instantiation does not return -EALREADY, that is, if get_random_bytes() has not been fully initialized at this point yet, drbg_prepare_hrng() will lower ->reseed_threshold to a value of 50. The drbg_async_seed() scheduled from said random_ready_callback will eventually restore the original value. A future patch will replace the random_ready_callback based notification mechanism and thus, there will be no add_random_ready_callback() return value anymore which could get compared to -EALREADY. However, there's __drbg_seed() which gets invoked in the course of both, the DRBG instantiation as well as the eventual reseeding from get_random_bytes() in aforementioned drbg_async_seed(), if any. Moreover, it knows about the get_random_bytes() initialization state by the time the seed data had been obtained from it: the new_seed_state argument introduced with the previous patch would get set to DRBG_SEED_STATE_PARTIAL in case get_random_bytes() had not been fully initialized yet and to DRBG_SEED_STATE_FULL otherwise. Thus, __drbg_seed() provides a convenient alternative for managing that ->reseed_threshold lowering and restoring at a central place. Move all ->reseed_threshold adjustment code from drbg_prepare_hrng() and drbg_async_seed() respectively to __drbg_seed(). Make __drbg_seed() lower the ->reseed_threshold to 50 in case its new_seed_state argument equals DRBG_SEED_STATE_PARTIAL and let it restore the original value otherwise. There is no change in behaviour. Signed-off-by: Nicolai Stange Reviewed-by: Stephan Müller Signed-off-by: Herbert Xu Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- crypto/drbg.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index bc5a533c09601..84b489fd42518 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1047,6 +1047,27 @@ static inline int __drbg_seed(struct drbg_state *drbg, struct list_head *seed, /* 10.1.1.2 / 10.1.1.3 step 5 */ drbg->reseed_ctr = 1; + switch (drbg->seeded) { + case DRBG_SEED_STATE_UNSEEDED: + /* Impossible, but handle it to silence compiler warnings. */ + fallthrough; + case DRBG_SEED_STATE_PARTIAL: + /* + * Require frequent reseeds until the seed source is + * fully initialized. + */ + drbg->reseed_threshold = 50; + break; + + case DRBG_SEED_STATE_FULL: + /* + * Seed source has become fully initialized, frequent + * reseeds no longer required. + */ + drbg->reseed_threshold = drbg_max_requests(drbg); + break; + } + return ret; } @@ -1095,9 +1116,6 @@ static void drbg_async_seed(struct work_struct *work) __drbg_seed(drbg, &seedlist, true, DRBG_SEED_STATE_FULL); - if (drbg->seeded == DRBG_SEED_STATE_FULL) - drbg->reseed_threshold = drbg_max_requests(drbg); - unlock: mutex_unlock(&drbg->drbg_mutex); @@ -1533,12 +1551,6 @@ static int drbg_prepare_hrng(struct drbg_state *drbg) return err; } - /* - * Require frequent reseeds until the seed source is fully - * initialized. - */ - drbg->reseed_threshold = 50; - return err; } -- GitLab From e61717947af57fd6751e1e79d4179f561dfc6149 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 2 Jun 2022 22:23:27 +0200 Subject: [PATCH 0302/4335] crypto: drbg - make reseeding from get_random_bytes() synchronous commit 074bcd4000e0d812bc253f86fedc40f81ed59ccc upstream. get_random_bytes() usually hasn't full entropy available by the time DRBG instances are first getting seeded from it during boot. Thus, the DRBG implementation registers random_ready_callbacks which would in turn schedule some work for reseeding the DRBGs once get_random_bytes() has sufficient entropy available. For reference, the relevant history around handling DRBG (re)seeding in the context of a not yet fully seeded get_random_bytes() is: commit 16b369a91d0d ("random: Blocking API for accessing nonblocking_pool") commit 4c7879907edd ("crypto: drbg - add async seeding operation") commit 205a525c3342 ("random: Add callback API for random pool readiness") commit 57225e679788 ("crypto: drbg - Use callback API for random readiness") commit c2719503f5e1 ("random: Remove kernel blocking API") However, some time later, the initialization state of get_random_bytes() has been made queryable via rng_is_initialized() introduced with commit 9a47249d444d ("random: Make crng state queryable"). This primitive now allows for streamlining the DRBG reseeding from get_random_bytes() by replacing that aforementioned asynchronous work scheduling from random_ready_callbacks with some simpler, synchronous code in drbg_generate() next to the related logic already present therein. Apart from improving overall code readability, this change will also enable DRBG users to rely on wait_for_random_bytes() for ensuring that the initial seeding has completed, if desired. The previous patches already laid the grounds by making drbg_seed() to record at each DRBG instance whether it was being seeded at a time when rng_is_initialized() still had been false as indicated by ->seeded == DRBG_SEED_STATE_PARTIAL. All that remains to be done now is to make drbg_generate() check for this condition, determine whether rng_is_initialized() has flipped to true in the meanwhile and invoke a reseed from get_random_bytes() if so. Make this move: - rename the former drbg_async_seed() work handler, i.e. the one in charge of reseeding a DRBG instance from get_random_bytes(), to "drbg_seed_from_random()", - change its signature as appropriate, i.e. make it take a struct drbg_state rather than a work_struct and change its return type from "void" to "int" in order to allow for passing error information from e.g. its __drbg_seed() invocation onwards to callers, - make drbg_generate() invoke this drbg_seed_from_random() once it encounters a DRBG instance with ->seeded == DRBG_SEED_STATE_PARTIAL by the time rng_is_initialized() has flipped to true and - prune everything related to the former, random_ready_callback based mechanism. As drbg_seed_from_random() is now getting invoked from drbg_generate() with the ->drbg_mutex being held, it must not attempt to recursively grab it once again. Remove the corresponding mutex operations from what is now drbg_seed_from_random(). Furthermore, as drbg_seed_from_random() can now report errors directly to its caller, there's no need for it to temporarily switch the DRBG's ->seeded state to DRBG_SEED_STATE_UNSEEDED so that a failure of the subsequently invoked __drbg_seed() will get signaled to drbg_generate(). Don't do it then. Signed-off-by: Nicolai Stange Signed-off-by: Herbert Xu [Jason: for stable, undid the modifications for the backport of 5acd3548.] Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- crypto/drbg.c | 61 ++++++++----------------------------------- drivers/char/random.c | 2 -- include/crypto/drbg.h | 2 -- 3 files changed, 11 insertions(+), 54 deletions(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index 84b489fd42518..761104e93d44a 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1087,12 +1087,10 @@ static inline int drbg_get_random_bytes(struct drbg_state *drbg, return 0; } -static void drbg_async_seed(struct work_struct *work) +static int drbg_seed_from_random(struct drbg_state *drbg) { struct drbg_string data; LIST_HEAD(seedlist); - struct drbg_state *drbg = container_of(work, struct drbg_state, - seed_work); unsigned int entropylen = drbg_sec_strength(drbg->core->flags); unsigned char entropy[32]; int ret; @@ -1103,23 +1101,15 @@ static void drbg_async_seed(struct work_struct *work) drbg_string_fill(&data, entropy, entropylen); list_add_tail(&data.list, &seedlist); - mutex_lock(&drbg->drbg_mutex); - ret = drbg_get_random_bytes(drbg, entropy, entropylen); if (ret) - goto unlock; - - /* Reset ->seeded so that if __drbg_seed fails the next - * generate call will trigger a reseed. - */ - drbg->seeded = DRBG_SEED_STATE_UNSEEDED; - - __drbg_seed(drbg, &seedlist, true, DRBG_SEED_STATE_FULL); + goto out; -unlock: - mutex_unlock(&drbg->drbg_mutex); + ret = __drbg_seed(drbg, &seedlist, true, DRBG_SEED_STATE_FULL); +out: memzero_explicit(entropy, entropylen); + return ret; } /* @@ -1422,6 +1412,11 @@ static int drbg_generate(struct drbg_state *drbg, goto err; /* 9.3.1 step 7.4 */ addtl = NULL; + } else if (rng_is_initialized() && + drbg->seeded == DRBG_SEED_STATE_PARTIAL) { + len = drbg_seed_from_random(drbg); + if (len) + goto err; } if (addtl && 0 < addtl->len) @@ -1514,44 +1509,15 @@ static int drbg_generate_long(struct drbg_state *drbg, return 0; } -static int drbg_schedule_async_seed(struct notifier_block *nb, unsigned long action, void *data) -{ - struct drbg_state *drbg = container_of(nb, struct drbg_state, - random_ready); - - schedule_work(&drbg->seed_work); - return 0; -} - static int drbg_prepare_hrng(struct drbg_state *drbg) { - int err; - /* We do not need an HRNG in test mode. */ if (list_empty(&drbg->test_data.list)) return 0; drbg->jent = crypto_alloc_rng("jitterentropy_rng", 0, 0); - INIT_WORK(&drbg->seed_work, drbg_async_seed); - - drbg->random_ready.notifier_call = drbg_schedule_async_seed; - err = register_random_ready_notifier(&drbg->random_ready); - - switch (err) { - case 0: - break; - - case -EALREADY: - err = 0; - fallthrough; - - default: - drbg->random_ready.notifier_call = NULL; - return err; - } - - return err; + return 0; } /* @@ -1645,11 +1611,6 @@ free_everything: */ static int drbg_uninstantiate(struct drbg_state *drbg) { - if (drbg->random_ready.notifier_call) { - unregister_random_ready_notifier(&drbg->random_ready); - cancel_work_sync(&drbg->seed_work); - } - if (!IS_ERR_OR_NULL(drbg->jent)) crypto_free_rng(drbg->jent); drbg->jent = NULL; diff --git a/drivers/char/random.c b/drivers/char/random.c index ca17a658c2147..e2f1fce8dc977 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -163,7 +163,6 @@ int __cold register_random_ready_notifier(struct notifier_block *nb) spin_unlock_irqrestore(&random_ready_chain_lock, flags); return ret; } -EXPORT_SYMBOL(register_random_ready_notifier); /* * Delete a previously registered readiness callback function. @@ -178,7 +177,6 @@ int __cold unregister_random_ready_notifier(struct notifier_block *nb) spin_unlock_irqrestore(&random_ready_chain_lock, flags); return ret; } -EXPORT_SYMBOL(unregister_random_ready_notifier); static void __cold process_random_ready_list(void) { diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index 01caab5e65dec..a6c3b8e7deb64 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -137,12 +137,10 @@ struct drbg_state { bool pr; /* Prediction resistance enabled? */ bool fips_primed; /* Continuous test primed? */ unsigned char *prev; /* FIPS 140-2 continuous test value */ - struct work_struct seed_work; /* asynchronous seeding support */ struct crypto_rng *jent; const struct drbg_state_ops *d_ops; const struct drbg_core *core; struct drbg_string test_data; - struct notifier_block random_ready; }; static inline __u8 drbg_statelen(struct drbg_state *drbg) -- GitLab From 89ef50fe03a55feccf5681c237673a2f98161161 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 27 May 2022 09:56:18 +0200 Subject: [PATCH 0303/4335] netfilter: nf_tables: sanitize nft_set_desc_concat_parse() commit fecf31ee395b0295f2d7260aa29946b7605f7c85 upstream. Add several sanity checks for nft_set_desc_concat_parse(): - validate desc->field_count not larger than desc->field_len array. - field length cannot be larger than desc->field_len (ie. U8_MAX) - total length of the concatenation cannot be larger than register array. Joint work with Florian Westphal. Fixes: f3a2181e16f1 ("netfilter: nf_tables: Support for sets with multiple ranged fields") Reported-by: Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 91865dc2bcbb8..ee4edebe6124f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4151,6 +4151,9 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, u32 len; int err; + if (desc->field_count >= ARRAY_SIZE(desc->field_len)) + return -E2BIG; + err = nla_parse_nested_deprecated(tb, NFTA_SET_FIELD_MAX, attr, nft_concat_policy, NULL); if (err < 0) @@ -4160,9 +4163,8 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, return -EINVAL; len = ntohl(nla_get_be32(tb[NFTA_SET_FIELD_LEN])); - - if (len * BITS_PER_BYTE / 32 > NFT_REG32_COUNT) - return -E2BIG; + if (!len || len > U8_MAX) + return -EINVAL; desc->field_len[desc->field_count++] = len; @@ -4173,7 +4175,8 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, const struct nlattr *nla) { struct nlattr *attr; - int rem, err; + u32 num_regs = 0; + int rem, err, i; nla_for_each_nested(attr, nla, rem) { if (nla_type(attr) != NFTA_LIST_ELEM) @@ -4184,6 +4187,12 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, return err; } + for (i = 0; i < desc->field_count; i++) + num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32)); + + if (num_regs > NFT_REG32_COUNT) + return -E2BIG; + return 0; } -- GitLab From 9ea55b9f43533d30168aae0dfdc5c230b561ef69 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 30 May 2022 18:24:05 +0200 Subject: [PATCH 0304/4335] netfilter: nf_tables: hold mutex on netns pre_exit path commit 3923b1e4406680d57da7e873da77b1683035d83f upstream. clean_net() runs in workqueue while walking over the lists, grab mutex. Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ee4edebe6124f..f274716e024d6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9746,7 +9746,11 @@ static int __net_init nf_tables_init_net(struct net *net) static void __net_exit nf_tables_pre_exit_net(struct net *net) { + struct nftables_pernet *nft_net = nft_pernet(net); + + mutex_lock(&nft_net->commit_mutex); __nft_release_hooks(net); + mutex_unlock(&nft_net->commit_mutex); } static void __net_exit nf_tables_exit_net(struct net *net) -- GitLab From 9c413a8c8bb49cc16796371805ecb260e885bb2b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 30 May 2022 18:24:06 +0200 Subject: [PATCH 0305/4335] netfilter: nf_tables: double hook unregistration in netns path commit f9a43007d3f7ba76d5e7f9421094f00f2ef202f8 upstream. __nft_release_hooks() is called from pre_netns exit path which unregisters the hooks, then the NETDEV_UNREGISTER event is triggered which unregisters the hooks again. [ 565.221461] WARNING: CPU: 18 PID: 193 at net/netfilter/core.c:495 __nf_unregister_net_hook+0x247/0x270 [...] [ 565.246890] CPU: 18 PID: 193 Comm: kworker/u64:1 Tainted: G E 5.18.0-rc7+ #27 [ 565.253682] Workqueue: netns cleanup_net [ 565.257059] RIP: 0010:__nf_unregister_net_hook+0x247/0x270 [...] [ 565.297120] Call Trace: [ 565.300900] [ 565.304683] nf_tables_flowtable_event+0x16a/0x220 [nf_tables] [ 565.308518] raw_notifier_call_chain+0x63/0x80 [ 565.312386] unregister_netdevice_many+0x54f/0xb50 Unregister and destroy netdev hook from netns pre_exit via kfree_rcu so the NETDEV_UNREGISTER path see unregistered hooks. Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 54 ++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f274716e024d6..79e8fc687fdd4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -222,12 +222,18 @@ err_register: } static void nft_netdev_unregister_hooks(struct net *net, - struct list_head *hook_list) + struct list_head *hook_list, + bool release_netdev) { - struct nft_hook *hook; + struct nft_hook *hook, *next; - list_for_each_entry(hook, hook_list, list) + list_for_each_entry_safe(hook, next, hook_list, list) { nf_unregister_net_hook(net, &hook->ops); + if (release_netdev) { + list_del(&hook->list); + kfree_rcu(hook, rcu); + } + } } static int nf_tables_register_hook(struct net *net, @@ -253,9 +259,10 @@ static int nf_tables_register_hook(struct net *net, return nf_register_net_hook(net, &basechain->ops); } -static void nf_tables_unregister_hook(struct net *net, - const struct nft_table *table, - struct nft_chain *chain) +static void __nf_tables_unregister_hook(struct net *net, + const struct nft_table *table, + struct nft_chain *chain, + bool release_netdev) { struct nft_base_chain *basechain; const struct nf_hook_ops *ops; @@ -270,11 +277,19 @@ static void nf_tables_unregister_hook(struct net *net, return basechain->type->ops_unregister(net, ops); if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) - nft_netdev_unregister_hooks(net, &basechain->hook_list); + nft_netdev_unregister_hooks(net, &basechain->hook_list, + release_netdev); else nf_unregister_net_hook(net, &basechain->ops); } +static void nf_tables_unregister_hook(struct net *net, + const struct nft_table *table, + struct nft_chain *chain) +{ + return __nf_tables_unregister_hook(net, table, chain, false); +} + static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); @@ -7206,13 +7221,25 @@ static void nft_unregister_flowtable_hook(struct net *net, FLOW_BLOCK_UNBIND); } -static void nft_unregister_flowtable_net_hooks(struct net *net, - struct list_head *hook_list) +static void __nft_unregister_flowtable_net_hooks(struct net *net, + struct list_head *hook_list, + bool release_netdev) { - struct nft_hook *hook; + struct nft_hook *hook, *next; - list_for_each_entry(hook, hook_list, list) + list_for_each_entry_safe(hook, next, hook_list, list) { nf_unregister_net_hook(net, &hook->ops); + if (release_netdev) { + list_del(&hook->list); + kfree_rcu(hook); + } + } +} + +static void nft_unregister_flowtable_net_hooks(struct net *net, + struct list_head *hook_list) +{ + __nft_unregister_flowtable_net_hooks(net, hook_list, false); } static int nft_register_flowtable_net_hooks(struct net *net, @@ -9605,9 +9632,10 @@ static void __nft_release_hook(struct net *net, struct nft_table *table) struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) - nf_tables_unregister_hook(net, table, chain); + __nf_tables_unregister_hook(net, table, chain, true); list_for_each_entry(flowtable, &table->flowtables, list) - nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list); + __nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list, + true); } static void __nft_release_hooks(struct net *net) -- GitLab From 01989d7eebb61c99bd4b88ebc8e261bd2f02caed Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 20 May 2022 00:02:04 +0200 Subject: [PATCH 0306/4335] netfilter: conntrack: re-fetch conntrack after insertion commit 56b14ecec97f39118bf85c9ac2438c5a949509ed upstream. In case the conntrack is clashing, insertion can free skb->_nfct and set skb->_nfct to the already-confirmed entry. This wasn't found before because the conntrack entry and the extension space used to free'd after an rcu grace period, plus the race needs events enabled to trigger. Reported-by: Fixes: 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race") Fixes: 2ad9d7747c10 ("netfilter: conntrack: free extension area immediately") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/net/netfilter/nf_conntrack_core.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 13807ea94cd2b..2d524782f53b7 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -58,8 +58,13 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) int ret = NF_ACCEPT; if (ct) { - if (!nf_ct_is_confirmed(ct)) + if (!nf_ct_is_confirmed(ct)) { ret = __nf_conntrack_confirm(skb); + + if (ret == NF_ACCEPT) + ct = (struct nf_conn *)skb_nfct(skb); + } + if (likely(ret == NF_ACCEPT)) nf_ct_deliver_cached_events(ct); } -- GitLab From 1b6bcda5df8cae10228c4332f92f4d2e310310da Mon Sep 17 00:00:00 2001 From: Xiaomeng Tong Date: Thu, 14 Apr 2022 14:21:03 +0800 Subject: [PATCH 0307/4335] KVM: PPC: Book3S HV: fix incorrect NULL check on list iterator commit 300981abddcb13f8f06ad58f52358b53a8096775 upstream. The bug is here: if (!p) return ret; The list iterator value 'p' will *always* be set and non-NULL by list_for_each_entry(), so it is incorrect to assume that the iterator value will be NULL if the list is empty or no element is found. To fix the bug, Use a new value 'iter' as the list iterator, while use the old value 'p' as a dedicated variable to point to the found element. Fixes: dfaa973ae960 ("KVM: PPC: Book3S HV: In H_SVM_INIT_DONE, migrate remaining normal-GFNs to secure-GFNs") Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Xiaomeng Tong Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220414062103.8153-1-xiam0nd.tong@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv_uvmem.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index a7061ee3b1577..3fbe710ff8390 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -360,13 +360,15 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot, struct kvm *kvm, unsigned long *gfn) { - struct kvmppc_uvmem_slot *p; + struct kvmppc_uvmem_slot *p = NULL, *iter; bool ret = false; unsigned long i; - list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) - if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns) + list_for_each_entry(iter, &kvm->arch.uvmem_pfns, list) + if (*gfn >= iter->base_pfn && *gfn < iter->base_pfn + iter->nr_pfns) { + p = iter; break; + } if (!p) return ret; /* -- GitLab From 7b54eb6319342eccd2c95a3e479e10282a5946c8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 19 May 2022 07:57:11 -0700 Subject: [PATCH 0308/4335] x86/kvm: Alloc dummy async #PF token outside of raw spinlock commit 0547758a6de3cc71a0cfdd031a3621a30db6a68b upstream. Drop the raw spinlock in kvm_async_pf_task_wake() before allocating the the dummy async #PF token, the allocator is preemptible on PREEMPT_RT kernels and must not be called from truly atomic contexts. Opportunistically document why it's ok to loop on allocation failure, i.e. why the function won't get stuck in an infinite loop. Reported-by: Yajun Deng Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kvm.c | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d36b58e705b6c..4444135e8c210 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -188,7 +188,7 @@ void kvm_async_pf_task_wake(u32 token) { u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; - struct kvm_task_sleep_node *n; + struct kvm_task_sleep_node *n, *dummy = NULL; if (token == ~0) { apf_task_wake_all(); @@ -200,28 +200,41 @@ again: n = _find_apf_task(b, token); if (!n) { /* - * async PF was not yet handled. - * Add dummy entry for the token. + * Async #PF not yet handled, add a dummy entry for the token. + * Allocating the token must be down outside of the raw lock + * as the allocator is preemptible on PREEMPT_RT kernels. */ - n = kzalloc(sizeof(*n), GFP_ATOMIC); - if (!n) { + if (!dummy) { + raw_spin_unlock(&b->lock); + dummy = kzalloc(sizeof(*dummy), GFP_KERNEL); + /* - * Allocation failed! Busy wait while other cpu - * handles async PF. + * Continue looping on allocation failure, eventually + * the async #PF will be handled and allocating a new + * node will be unnecessary. + */ + if (!dummy) + cpu_relax(); + + /* + * Recheck for async #PF completion before enqueueing + * the dummy token to avoid duplicate list entries. */ - raw_spin_unlock(&b->lock); - cpu_relax(); goto again; } - n->token = token; - n->cpu = smp_processor_id(); - init_swait_queue_head(&n->wq); - hlist_add_head(&n->link, &b->list); + dummy->token = token; + dummy->cpu = smp_processor_id(); + init_swait_queue_head(&dummy->wq); + hlist_add_head(&dummy->link, &b->list); + dummy = NULL; } else { apf_task_wake_one(n); } raw_spin_unlock(&b->lock); - return; + + /* A dummy token might be allocated and ultimately not used. */ + if (dummy) + kfree(dummy); } EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); -- GitLab From eea9755a04e042bfd15566d7524aadcb741c4a2b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 24 May 2022 09:43:31 -0400 Subject: [PATCH 0309/4335] x86, kvm: use correct GFP flags for preemption disabled commit baec4f5a018fe2d708fc1022330dba04b38b5fe3 upstream. Commit ddd7ed842627 ("x86/kvm: Alloc dummy async #PF token outside of raw spinlock") leads to the following Smatch static checker warning: arch/x86/kernel/kvm.c:212 kvm_async_pf_task_wake() warn: sleeping in atomic context arch/x86/kernel/kvm.c 202 raw_spin_lock(&b->lock); 203 n = _find_apf_task(b, token); 204 if (!n) { 205 /* 206 * Async #PF not yet handled, add a dummy entry for the token. 207 * Allocating the token must be down outside of the raw lock 208 * as the allocator is preemptible on PREEMPT_RT kernels. 209 */ 210 if (!dummy) { 211 raw_spin_unlock(&b->lock); --> 212 dummy = kzalloc(sizeof(*dummy), GFP_KERNEL); ^^^^^^^^^^ Smatch thinks the caller has preempt disabled. The `smdb.py preempt kvm_async_pf_task_wake` output call tree is: sysvec_kvm_asyncpf_interrupt() <- disables preempt -> __sysvec_kvm_asyncpf_interrupt() -> kvm_async_pf_task_wake() The caller is this: arch/x86/kernel/kvm.c 290 DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt) 291 { 292 struct pt_regs *old_regs = set_irq_regs(regs); 293 u32 token; 294 295 ack_APIC_irq(); 296 297 inc_irq_stat(irq_hv_callback_count); 298 299 if (__this_cpu_read(apf_reason.enabled)) { 300 token = __this_cpu_read(apf_reason.token); 301 kvm_async_pf_task_wake(token); 302 __this_cpu_write(apf_reason.token, 0); 303 wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1); 304 } 305 306 set_irq_regs(old_regs); 307 } The DEFINE_IDTENTRY_SYSVEC() is a wrapper that calls this function from the call_on_irqstack_cond(). It's inside the call_on_irqstack_cond() where preempt is disabled (unless it's already disabled). The irq_enter/exit_rcu() functions disable/enable preempt. Reported-by: Dan Carpenter Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 4444135e8c210..9e3af56747e8f 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -206,7 +206,7 @@ again: */ if (!dummy) { raw_spin_unlock(&b->lock); - dummy = kzalloc(sizeof(*dummy), GFP_KERNEL); + dummy = kzalloc(sizeof(*dummy), GFP_ATOMIC); /* * Continue looping on allocation failure, eventually -- GitLab From 531d1070d864c78283b7597449e60ddc53319d88 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 11 Mar 2022 03:27:41 +0000 Subject: [PATCH 0310/4335] KVM: x86: avoid calling x86 emulator without a decoded instruction commit fee060cd52d69c114b62d1a2948ea9648b5131f9 upstream. Whenever x86_decode_emulated_instruction() detects a breakpoint, it returns the value that kvm_vcpu_check_breakpoint() writes into its pass-by-reference second argument. Unfortunately this is completely bogus because the expected outcome of x86_decode_emulated_instruction is an EMULATION_* value. Then, if kvm_vcpu_check_breakpoint() does "*r = 0" (corresponding to a KVM_EXIT_DEBUG userspace exit), it is misunderstood as EMULATION_OK and x86_emulate_instruction() is called without having decoded the instruction. This causes various havoc from running with a stale emulation context. The fix is to move the call to kvm_vcpu_check_breakpoint() where it was before commit 4aa2691dcbd3 ("KVM: x86: Factor out x86 instruction emulation with decoding") introduced x86_decode_emulated_instruction(). The other caller of the function does not need breakpoint checks, because it is invoked as part of a vmexit and the processor has already checked those before executing the instruction that #GP'd. This fixes CVE-2022-1852. Reported-by: Qiuhao Li Reported-by: Gaoning Pan Reported-by: Yongkang Jia Fixes: 4aa2691dcbd3 ("KVM: x86: Factor out x86 instruction emulation with decoding") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20220311032801.3467418-2-seanjc@google.com> [Rewrote commit message according to Qiuhao's report, since a patch already existed to fix the bug. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 75da9c0d5ae37..23905ba3058ae 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7846,7 +7846,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); -static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) +static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, int *r) { if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) && (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) { @@ -7915,25 +7915,23 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) } /* - * Decode to be emulated instruction. Return EMULATION_OK if success. + * Decode an instruction for emulation. The caller is responsible for handling + * code breakpoints. Note, manually detecting code breakpoints is unnecessary + * (and wrong) when emulating on an intercepted fault-like exception[*], as + * code breakpoints have higher priority and thus have already been done by + * hardware. + * + * [*] Except #MC, which is higher priority, but KVM should never emulate in + * response to a machine check. */ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, void *insn, int insn_len) { - int r = EMULATION_OK; struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; + int r; init_emulate_ctxt(vcpu); - /* - * We will reenter on the same instruction since we do not set - * complete_userspace_io. This does not handle watchpoints yet, - * those would be handled in the emulate_ops. - */ - if (!(emulation_type & EMULTYPE_SKIP) && - kvm_vcpu_check_breakpoint(vcpu, &r)) - return r; - r = x86_decode_insn(ctxt, insn, insn_len, emulation_type); trace_kvm_emulate_insn_start(vcpu); @@ -7966,6 +7964,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, if (!(emulation_type & EMULTYPE_NO_DECODE)) { kvm_clear_exception_queue(vcpu); + /* + * Return immediately if RIP hits a code breakpoint, such #DBs + * are fault-like and are higher priority than any faults on + * the code fetch itself. + */ + if (!(emulation_type & EMULTYPE_SKIP) && + kvm_vcpu_check_code_breakpoint(vcpu, &r)) + return r; + r = x86_decode_emulated_instruction(vcpu, emulation_type, insn, insn_len); if (r != EMULATION_OK) { -- GitLab From 8d3a2aa0976f57320ba89baf9d57fb158dd0cd0d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 7 Apr 2022 00:23:13 +0000 Subject: [PATCH 0311/4335] KVM: x86: Drop WARNs that assert a triple fault never "escapes" from L2 commit 45846661d10422ce9e22da21f8277540b29eca22 upstream. Remove WARNs that sanity check that KVM never lets a triple fault for L2 escape and incorrectly end up in L1. In normal operation, the sanity check is perfectly valid, but it incorrectly assumes that it's impossible for userspace to induce KVM_REQ_TRIPLE_FAULT without bouncing through KVM_RUN (which guarantees kvm_check_nested_state() will see and handle the triple fault). The WARN can currently be triggered if userspace injects a machine check while L2 is active and CR4.MCE=0. And a future fix to allow save/restore of KVM_REQ_TRIPLE_FAULT, e.g. so that a synthesized triple fault isn't lost on migration, will make it trivially easy for userspace to trigger the WARN. Clearing KVM_REQ_TRIPLE_FAULT when forcibly leaving guest mode is tempting, but wrong, especially if/when the request is saved/restored, e.g. if userspace restores events (including a triple fault) and then restores nested state (which may forcibly leave guest mode). Ignoring the fact that KVM doesn't currently provide the necessary APIs, it's userspace's responsibility to manage pending events during save/restore. ------------[ cut here ]------------ WARNING: CPU: 7 PID: 1399 at arch/x86/kvm/vmx/nested.c:4522 nested_vmx_vmexit+0x7fe/0xd90 [kvm_intel] Modules linked in: kvm_intel kvm irqbypass CPU: 7 PID: 1399 Comm: state_test Not tainted 5.17.0-rc3+ #808 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:nested_vmx_vmexit+0x7fe/0xd90 [kvm_intel] Call Trace: vmx_leave_nested+0x30/0x40 [kvm_intel] vmx_set_nested_state+0xca/0x3e0 [kvm_intel] kvm_arch_vcpu_ioctl+0xf49/0x13e0 [kvm] kvm_vcpu_ioctl+0x4b9/0x660 [kvm] __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae ---[ end trace 0000000000000000 ]--- Fixes: cb6a32c2b877 ("KVM: x86: Handle triple fault in L2 without killing L1") Cc: stable@vger.kernel.org Cc: Chenyi Qiang Signed-off-by: Sean Christopherson Message-Id: <20220407002315.78092-2-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 3 --- arch/x86/kvm/vmx/nested.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 556e7a3f35627..993daa6fb1287 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -750,9 +750,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) struct kvm_host_map map; int rc; - /* Triple faults in L2 should never escape. */ - WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)); - rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map); if (rc) { if (rc == -EINVAL) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1546a10ecb564..5eae69c8123b2 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4501,9 +4501,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, /* trying to cancel vmlaunch/vmresume is a bug */ WARN_ON_ONCE(vmx->nested.nested_run_pending); - /* Similarly, triple faults in L2 should never escape. */ - WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)); - if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { /* * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map -- GitLab From d8fdb4b24097472ff6b3c0559448200d420b1418 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 16 May 2022 15:43:10 +0000 Subject: [PATCH 0312/4335] KVM: SVM: Use kzalloc for sev ioctl interfaces to prevent kernel data leak commit d22d2474e3953996f03528b84b7f52cc26a39403 upstream. For some sev ioctl interfaces, the length parameter that is passed maybe less than or equal to SEV_FW_BLOB_MAX_SIZE, but larger than the data that PSP firmware returns. In this case, kmalloc will allocate memory that is the size of the input rather than the size of the data. Since PSP firmware doesn't fully overwrite the allocated buffer, these sev ioctl interface may return uninitialized kernel slab memory. Reported-by: Andy Nguyen Suggested-by: David Rientjes Suggested-by: Peter Gonda Cc: kvm@vger.kernel.org Cc: stable@vger.kernel.org Cc: linux-kernel@vger.kernel.org Fixes: eaf78265a4ab3 ("KVM: SVM: Move SEV code to separate file") Fixes: 2c07ded06427d ("KVM: SVM: add support for SEV attestation command") Fixes: 4cfdd47d6d95a ("KVM: SVM: Add KVM_SEV SEND_START command") Fixes: d3d1af85e2c75 ("KVM: SVM: Add KVM_SEND_UPDATE_DATA command") Fixes: eba04b20e4861 ("KVM: x86: Account a variety of miscellaneous allocations") Signed-off-by: Ashish Kalra Reviewed-by: Peter Gonda Message-Id: <20220516154310.3685678-1-Ashish.Kalra@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/sev.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c8c3212250618..4a4dc105552e3 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -676,7 +676,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) if (params.len > SEV_FW_BLOB_MAX_SIZE) return -EINVAL; - blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT); + blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT); if (!blob) return -ENOMEM; @@ -796,7 +796,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, if (!IS_ALIGNED(dst_paddr, 16) || !IS_ALIGNED(paddr, 16) || !IS_ALIGNED(size, 16)) { - tpage = (void *)alloc_page(GFP_KERNEL); + tpage = (void *)alloc_page(GFP_KERNEL | __GFP_ZERO); if (!tpage) return -ENOMEM; @@ -1082,7 +1082,7 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp) if (params.len > SEV_FW_BLOB_MAX_SIZE) return -EINVAL; - blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT); + blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT); if (!blob) return -ENOMEM; @@ -1164,7 +1164,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp) return -EINVAL; /* allocate the memory to hold the session data blob */ - session_data = kmalloc(params.session_len, GFP_KERNEL_ACCOUNT); + session_data = kzalloc(params.session_len, GFP_KERNEL_ACCOUNT); if (!session_data) return -ENOMEM; @@ -1288,11 +1288,11 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) /* allocate memory for header and transport buffer */ ret = -ENOMEM; - hdr = kmalloc(params.hdr_len, GFP_KERNEL_ACCOUNT); + hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT); if (!hdr) goto e_unpin; - trans_data = kmalloc(params.trans_len, GFP_KERNEL_ACCOUNT); + trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT); if (!trans_data) goto e_free_hdr; -- GitLab From dd36037d4ae7d66a58ce0ee5ed414d185f9d8652 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 20 Apr 2022 09:06:01 -0300 Subject: [PATCH 0313/4335] crypto: caam - fix i.MX6SX entropy delay value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4ee4cdad368a26de3967f2975806a9ee2fa245df upstream. Since commit 358ba762d9f1 ("crypto: caam - enable prediction resistance in HRWNG") the following CAAM errors can be seen on i.MX6SX: caam_jr 2101000.jr: 20003c5b: CCB: desc idx 60: RNG: Hardware error hwrng: no data available This error is due to an incorrect entropy delay for i.MX6SX. Fix it by increasing the minimum entropy delay for i.MX6SX as done in U-Boot: https://patchwork.ozlabs.org/project/uboot/patch/20220415111049.2565744-1-gaurav.jain@nxp.com/ As explained in the U-Boot patch: "RNG self tests are run to determine the correct entropy delay. Such tests are executed with different voltages and temperatures to identify the worst case value for the entropy delay. For i.MX6SX, it was determined that after adding a margin value of 1000 the minimum entropy delay should be at least 12000." Cc: Fixes: 358ba762d9f1 ("crypto: caam - enable prediction resistance in HRWNG") Signed-off-by: Fabio Estevam Reviewed-by: Horia Geantă Reviewed-by: Vabhav Sharma Reviewed-by: Gaurav Jain Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/ctrl.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index ca0361b2dbb07..f87aa2169e5f5 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -609,6 +609,13 @@ static bool check_version(struct fsl_mc_version *mc_version, u32 major, } #endif +static bool needs_entropy_delay_adjustment(void) +{ + if (of_machine_is_compatible("fsl,imx6sx")) + return true; + return false; +} + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { @@ -855,6 +862,8 @@ static int caam_probe(struct platform_device *pdev) * Also, if a handle was instantiated, do not change * the TRNG parameters. */ + if (needs_entropy_delay_adjustment()) + ent_delay = 12000; if (!(ctrlpriv->rng4_sh_init || inst_handles)) { dev_info(dev, "Entropy delay = %u\n", @@ -871,6 +880,15 @@ static int caam_probe(struct platform_device *pdev) */ ret = instantiate_rng(dev, inst_handles, gen_sk); + /* + * Entropy delay is determined via TRNG characterization. + * TRNG characterization is run across different voltages + * and temperatures. + * If worst case value for ent_dly is identified, + * the loop can be skipped for that platform. + */ + if (needs_entropy_delay_adjustment()) + break; if (ret == -EAGAIN) /* * if here, the loop will rerun, -- GitLab From 5763176f695432bcfde726f0e9bfda1fdb7d4b04 Mon Sep 17 00:00:00 2001 From: Vitaly Chikunov Date: Thu, 21 Apr 2022 20:25:10 +0300 Subject: [PATCH 0314/4335] crypto: ecrdsa - Fix incorrect use of vli_cmp commit 7cc7ab73f83ee6d50dc9536bc3355495d8600fad upstream. Correctly compare values that shall be greater-or-equal and not just greater. Fixes: 0d7a78643f69 ("crypto: ecrdsa - add EC-RDSA (GOST 34.10) algorithm") Cc: Signed-off-by: Vitaly Chikunov Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ecrdsa.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c index 6a3fd09057d0c..f7ed430206720 100644 --- a/crypto/ecrdsa.c +++ b/crypto/ecrdsa.c @@ -113,15 +113,15 @@ static int ecrdsa_verify(struct akcipher_request *req) /* Step 1: verify that 0 < r < q, 0 < s < q */ if (vli_is_zero(r, ndigits) || - vli_cmp(r, ctx->curve->n, ndigits) == 1 || + vli_cmp(r, ctx->curve->n, ndigits) >= 0 || vli_is_zero(s, ndigits) || - vli_cmp(s, ctx->curve->n, ndigits) == 1) + vli_cmp(s, ctx->curve->n, ndigits) >= 0) return -EKEYREJECTED; /* Step 2: calculate hash (h) of the message (passed as input) */ /* Step 3: calculate e = h \mod q */ vli_from_le64(e, digest, ndigits); - if (vli_cmp(e, ctx->curve->n, ndigits) == 1) + if (vli_cmp(e, ctx->curve->n, ndigits) >= 0) vli_sub(e, e, ctx->curve->n, ndigits); if (vli_is_zero(e, ndigits)) e[0] = 1; @@ -137,7 +137,7 @@ static int ecrdsa_verify(struct akcipher_request *req) /* Step 6: calculate point C = z_1P + z_2Q, and R = x_c \mod q */ ecc_point_mult_shamir(&cc, z1, &ctx->curve->g, z2, &ctx->pub_key, ctx->curve); - if (vli_cmp(cc.x, ctx->curve->n, ndigits) == 1) + if (vli_cmp(cc.x, ctx->curve->n, ndigits) >= 0) vli_sub(cc.x, cc.x, ctx->curve->n, ndigits); /* Step 7: if R == r signature is valid */ -- GitLab From 3ec459c8810e658401be428d3168eacfc380bdd0 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Fri, 13 May 2022 15:11:26 -0700 Subject: [PATCH 0315/4335] zsmalloc: fix races between asynchronous zspage free and page migration commit 2505a981114dcb715f8977b8433f7540854851d8 upstream. The asynchronous zspage free worker tries to lock a zspage's entire page list without defending against page migration. Since pages which haven't yet been locked can concurrently migrate off the zspage page list while lock_zspage() churns away, lock_zspage() can suffer from a few different lethal races. It can lock a page which no longer belongs to the zspage and unsafely dereference page_private(), it can unsafely dereference a torn pointer to the next page (since there's a data race), and it can observe a spurious NULL pointer to the next page and thus not lock all of the zspage's pages (since a single page migration will reconstruct the entire page list, and create_page_chain() unconditionally zeroes out each list pointer in the process). Fix the races by using migrate_read_lock() in lock_zspage() to synchronize with page migration. Link: https://lkml.kernel.org/r/20220509024703.243847-1-sultan@kerneltoast.com Fixes: 77ff465799c602 ("zsmalloc: zs_page_migrate: skip unnecessary loops but not return -EBUSY if zspage is not inuse") Signed-off-by: Sultan Alsawaf Acked-by: Minchan Kim Cc: Nitin Gupta Cc: Sergey Senozhatsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/zsmalloc.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index b897ce3b399a1..439deb8decbcc 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1743,11 +1743,40 @@ static enum fullness_group putback_zspage(struct size_class *class, */ static void lock_zspage(struct zspage *zspage) { - struct page *page = get_first_page(zspage); + struct page *curr_page, *page; - do { - lock_page(page); - } while ((page = get_next_page(page)) != NULL); + /* + * Pages we haven't locked yet can be migrated off the list while we're + * trying to lock them, so we need to be careful and only attempt to + * lock each page under migrate_read_lock(). Otherwise, the page we lock + * may no longer belong to the zspage. This means that we may wait for + * the wrong page to unlock, so we must take a reference to the page + * prior to waiting for it to unlock outside migrate_read_lock(). + */ + while (1) { + migrate_read_lock(zspage); + page = get_first_page(zspage); + if (trylock_page(page)) + break; + get_page(page); + migrate_read_unlock(zspage); + wait_on_page_locked(page); + put_page(page); + } + + curr_page = page; + while ((page = get_next_page(curr_page))) { + if (trylock_page(page)) { + curr_page = page; + } else { + get_page(page); + migrate_read_unlock(zspage); + wait_on_page_locked(page); + put_page(page); + migrate_read_lock(zspage); + } + } + migrate_read_unlock(zspage); } static int zs_init_fs_context(struct fs_context *fc) -- GitLab From 8c9a54eed73880c56e5d5697b419399522495910 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 21 May 2022 08:46:27 +0200 Subject: [PATCH 0316/4335] ALSA: usb-audio: Workaround for clock setup on TEAC devices commit 5ce0b06ae5e69e23142e73c5c3c0260e9f2ccb4b upstream. Maris reported that TEAC UD-501 (0644:8043) doesn't work with the typical "clock source 41 is not valid, cannot use" errors on the recent kernels. The currently known workaround so far is to restore (partially) what we've done unconditionally at the clock setup; namely, re-setup the USB interface immediately after the clock is changed. This patch re-introduces the behavior conditionally for TEAC devices. Further notes: - The USB interface shall be set later in snd_usb_endpoint_configure(), but this seems to be too late. - Even calling usb_set_interface() right after sne_usb_init_sample_rate() doesn't help; so this must be related with the clock validation, too. - The device may still spew the "clock source 41 is not valid" error at the first clock setup. This seems happening at the very first try of clock setup, but it disappears at later attempts. The error is likely harmless because the driver retries the clock setup (such an error is more or less expected on some devices). Fixes: bf6313a0ff76 ("ALSA: usb-audio: Refactor endpoint management") Reported-and-tested-by: Maris Abele Cc: Link: https://lore.kernel.org/r/20220521064627.29292-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/clock.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 98345a695dccb..a87aec3ac242e 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -572,6 +572,13 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip, /* continue processing */ } + /* FIXME - TEAC devices require the immediate interface setup */ + if (rate != prev_rate && USB_ID_VENDOR(chip->usb_id) == 0x0644) { + usb_set_interface(chip->dev, fmt->iface, fmt->altsetting); + if (chip->quirk_flags & QUIRK_FLAG_IFACE_DELAY) + msleep(50); + } + validation: /* validate clock after rate change */ if (!uac_clock_source_is_valid(chip, fmt, clock)) -- GitLab From 8d1f71573089a85db744e14b9daccb1333e19fdc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 21 May 2022 08:53:25 +0200 Subject: [PATCH 0317/4335] ALSA: usb-audio: Add missing ep_idx in fixed EP quirks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7b0efea4baf02f5e2f89e5f9b75ef891571b45f1 upstream. The quirk entry for Focusrite Saffire 6 had no proper ep_idx for the capture endpoint, and this confused the driver, resulting in the broken sound. This patch adds the missing ep_idx in the entry. While we are at it, a couple of other entries (for Digidesign MBox and MOTU MicroBook II) seem to have the same problem, and those are covered as well. Fixes: bf6313a0ff76 ("ALSA: usb-audio: Refactor endpoint management") Reported-by: André Kapelrud Cc: Link: https://lore.kernel.org/r/20220521065325.426-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks-table.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 40a5e3eb4ef26..78eb41b621d63 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2672,6 +2672,7 @@ YAMAHA_DEVICE(0x7010, "UB99"), .altset_idx = 1, .attributes = 0, .endpoint = 0x82, + .ep_idx = 1, .ep_attr = USB_ENDPOINT_XFER_ISOC, .datainterval = 1, .maxpacksize = 0x0126, @@ -2875,6 +2876,7 @@ YAMAHA_DEVICE(0x7010, "UB99"), .altset_idx = 1, .attributes = 0x4, .endpoint = 0x81, + .ep_idx = 1, .ep_attr = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, .maxpacksize = 0x130, @@ -3391,6 +3393,7 @@ YAMAHA_DEVICE(0x7010, "UB99"), .altset_idx = 1, .attributes = 0, .endpoint = 0x03, + .ep_idx = 1, .rates = SNDRV_PCM_RATE_96000, .ep_attr = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, -- GitLab From f18aa2fc00bf1794fb2a92169b7fc6575c4cf289 Mon Sep 17 00:00:00 2001 From: Craig McLure Date: Tue, 24 May 2022 08:21:15 +0200 Subject: [PATCH 0318/4335] ALSA: usb-audio: Configure sync endpoints before data commit 0e85a22d01dfe9ad9a9d9e87cd4a88acce1aad65 upstream. Devices such as the TC-Helicon GoXLR require the sync endpoint to be configured in advance of the data endpoint in order for sound output to work. This patch simply changes the ordering of EP configuration to resolve this. Fixes: bf6313a0ff76 ("ALSA: usb-audio: Refactor endpoint management") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215079 Signed-off-by: Craig McLure Reviewed-by: Jaroslav Kysela Cc: Link: https://lore.kernel.org/r/20220524062115.25968-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/pcm.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 729e26f5ac4c7..9e65a42cc9b77 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -439,16 +439,21 @@ static int configure_endpoints(struct snd_usb_audio *chip, /* stop any running stream beforehand */ if (stop_endpoints(subs, false)) sync_pending_stops(subs); + if (subs->sync_endpoint) { + err = snd_usb_endpoint_configure(chip, subs->sync_endpoint); + if (err < 0) + return err; + } err = snd_usb_endpoint_configure(chip, subs->data_endpoint); if (err < 0) return err; snd_usb_set_format_quirk(subs, subs->cur_audiofmt); - } - - if (subs->sync_endpoint) { - err = snd_usb_endpoint_configure(chip, subs->sync_endpoint); - if (err < 0) - return err; + } else { + if (subs->sync_endpoint) { + err = snd_usb_endpoint_configure(chip, subs->sync_endpoint); + if (err < 0) + return err; + } } return 0; -- GitLab From db03727b4bbbbb36e6ef4cb655c670eefb6448e9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 5 Apr 2022 10:02:00 -0400 Subject: [PATCH 0319/4335] Bluetooth: hci_qca: Use del_timer_sync() before freeing commit 72ef98445aca568a81c2da050532500a8345ad3a upstream. While looking at a crash report on a timer list being corrupted, which usually happens when a timer is freed while still active. This is commonly triggered by code calling del_timer() instead of del_timer_sync() just before freeing. One possible culprit is the hci_qca driver, which does exactly that. Eric mentioned that wake_retrans_timer could be rearmed via the work queue, so also move the destruction of the work queue before del_timer_sync(). Cc: Eric Dumazet Cc: stable@vger.kernel.org Fixes: 0ff252c1976da ("Bluetooth: hciuart: Add support QCA chipset for UART") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_qca.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 8eb7fddfb9300..ed91af4319b5b 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -696,9 +696,9 @@ static int qca_close(struct hci_uart *hu) skb_queue_purge(&qca->tx_wait_q); skb_queue_purge(&qca->txq); skb_queue_purge(&qca->rx_memdump_q); - del_timer(&qca->tx_idle_timer); - del_timer(&qca->wake_retrans_timer); destroy_workqueue(qca->workqueue); + del_timer_sync(&qca->tx_idle_timer); + del_timer_sync(&qca->wake_retrans_timer); qca->hu = NULL; kfree_skb(qca->rx_skb); -- GitLab From a485b32de799e0ff1ebc7f7a2b78fb7a7df4d1d3 Mon Sep 17 00:00:00 2001 From: Jonathan Bakker Date: Sun, 27 Mar 2022 11:08:51 -0700 Subject: [PATCH 0320/4335] ARM: dts: s5pv210: Correct interrupt name for bluetooth in Aries commit 3f5e3d3a8b895c8a11da8b0063ba2022dd9e2045 upstream. Correct the name of the bluetooth interrupt from host-wake to host-wakeup. Fixes: 1c65b6184441b ("ARM: dts: s5pv210: Correct BCM4329 bluetooth node") Cc: Signed-off-by: Jonathan Bakker Link: https://lore.kernel.org/r/CY4PR04MB0567495CFCBDC8D408D44199CB1C9@CY4PR04MB0567.namprd04.prod.outlook.com Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/s5pv210-aries.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/s5pv210-aries.dtsi b/arch/arm/boot/dts/s5pv210-aries.dtsi index 160f8cd9a68da..2f57100a011a3 100644 --- a/arch/arm/boot/dts/s5pv210-aries.dtsi +++ b/arch/arm/boot/dts/s5pv210-aries.dtsi @@ -895,7 +895,7 @@ device-wakeup-gpios = <&gpg3 4 GPIO_ACTIVE_HIGH>; interrupt-parent = <&gph2>; interrupts = <5 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "host-wake"; + interrupt-names = "host-wakeup"; }; }; -- GitLab From ed0712361a917f57b6dbd4311d4b5d7223900b81 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 25 Apr 2022 14:56:48 +0300 Subject: [PATCH 0321/4335] dm integrity: fix error code in dm_integrity_ctr() commit d3f2a14b8906df913cb04a706367b012db94a6e8 upstream. The "r" variable shadows an earlier "r" that has function scope. It means that we accidentally return success instead of an error code. Smatch has a warning for this: drivers/md/dm-integrity.c:4503 dm_integrity_ctr() warn: missing error code 'r' Fixes: 7eada909bfd7 ("dm: add integrity target") Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index e2a51c184a254..d5b8270869620 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4478,8 +4478,6 @@ try_smaller_buffer: } if (should_write_sb) { - int r; - init_journal(ic, 0, ic->journal_sections, 0); r = dm_integrity_failed(ic); if (unlikely(r)) { -- GitLab From fd77cb62207431d6dff0f17315ad169b3df4d9ef Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 25 Apr 2022 08:53:29 -0400 Subject: [PATCH 0322/4335] dm crypt: make printing of the key constant-time commit 567dd8f34560fa221a6343729474536aa7ede4fd upstream. The device mapper dm-crypt target is using scnprintf("%02x", cc->key[i]) to report the current key to userspace. However, this is not a constant-time operation and it may leak information about the key via timing, via cache access patterns or via the branch predictor. Change dm-crypt's key printing to use "%c" instead of "%02x". Also introduce hex2asc() that carefully avoids any branching or memory accesses when converting a number in the range 0 ... 15 to an ascii character. Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Tested-by: Milan Broz Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 154139bf7d22b..f30fd38c3773b 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3435,6 +3435,11 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } +static char hex2asc(unsigned char c) +{ + return c + '0' + ((unsigned)(9 - c) >> 4 & 0x27); +} + static void crypt_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { @@ -3453,9 +3458,12 @@ static void crypt_status(struct dm_target *ti, status_type_t type, if (cc->key_size > 0) { if (cc->key_string) DMEMIT(":%u:%s", cc->key_size, cc->key_string); - else - for (i = 0; i < cc->key_size; i++) - DMEMIT("%02x", cc->key[i]); + else { + for (i = 0; i < cc->key_size; i++) { + DMEMIT("%c%c", hex2asc(cc->key[i] >> 4), + hex2asc(cc->key[i] & 0xf)); + } + } } else DMEMIT("-"); -- GitLab From 40aaeb41dde09939ff41ccb2e569c3d8e46c6b3a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 24 Apr 2022 16:43:00 -0400 Subject: [PATCH 0323/4335] dm stats: add cond_resched when looping over entries commit bfe2b0146c4d0230b68f5c71a64380ff8d361f8b upstream. dm-stats can be used with a very large number of entries (it is only limited by 1/4 of total system memory), so add rescheduling points to the loops that iterate over the entries. Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-stats.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 0e039a8c0bf2e..a3f2050b9c9b4 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -225,6 +225,7 @@ void dm_stats_cleanup(struct dm_stats *stats) atomic_read(&shared->in_flight[READ]), atomic_read(&shared->in_flight[WRITE])); } + cond_resched(); } dm_stat_free(&s->rcu_head); } @@ -330,6 +331,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, for (ni = 0; ni < n_entries; ni++) { atomic_set(&s->stat_shared[ni].in_flight[READ], 0); atomic_set(&s->stat_shared[ni].in_flight[WRITE], 0); + cond_resched(); } if (s->n_histogram_entries) { @@ -342,6 +344,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, for (ni = 0; ni < n_entries; ni++) { s->stat_shared[ni].tmp.histogram = hi; hi += s->n_histogram_entries + 1; + cond_resched(); } } @@ -362,6 +365,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, for (ni = 0; ni < n_entries; ni++) { p[ni].histogram = hi; hi += s->n_histogram_entries + 1; + cond_resched(); } } } @@ -497,6 +501,7 @@ static int dm_stats_list(struct dm_stats *stats, const char *program, } DMEMIT("\n"); } + cond_resched(); } mutex_unlock(&stats->mutex); @@ -774,6 +779,7 @@ static void __dm_stat_clear(struct dm_stat *s, size_t idx_start, size_t idx_end, local_irq_enable(); } } + cond_resched(); } } @@ -889,6 +895,8 @@ static int dm_stats_print(struct dm_stats *stats, int id, if (unlikely(sz + 1 >= maxlen)) goto buffer_overflow; + + cond_resched(); } if (clear) -- GitLab From 69712b170237ec5979f168149cd31e851a465853 Mon Sep 17 00:00:00 2001 From: Sarthak Kukreti Date: Tue, 31 May 2022 15:56:40 -0400 Subject: [PATCH 0324/4335] dm verity: set DM_TARGET_IMMUTABLE feature flag commit 4caae58406f8ceb741603eee460d79bacca9b1b5 upstream. The device-mapper framework provides a mechanism to mark targets as immutable (and hence fail table reloads that try to change the target type). Add the DM_TARGET_IMMUTABLE flag to the dm-verity target's feature flags to prevent switching the verity target with a different target type. Fixes: a4ffc152198e ("dm: add verity target") Cc: stable@vger.kernel.org Signed-off-by: Sarthak Kukreti Reviewed-by: Kees Cook Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-target.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 88288c8d6bc8c..426299ceb33d7 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -1312,6 +1312,7 @@ bad: static struct target_type verity_target = { .name = "verity", + .features = DM_TARGET_IMMUTABLE, .version = {1, 8, 0}, .module = THIS_MODULE, .ctr = verity_ctr, -- GitLab From 6e5bc6f7fef0f3e725b801a510b205ebc6f245d8 Mon Sep 17 00:00:00 2001 From: Mariusz Tkaczyk Date: Tue, 22 Mar 2022 16:23:39 +0100 Subject: [PATCH 0325/4335] raid5: introduce MD_BROKEN commit 57668f0a4cc4083a120cc8c517ca0055c4543b59 upstream. Raid456 module had allowed to achieve failed state. It was fixed by fb73b357fb9 ("raid5: block failing device if raid will be failed"). This fix introduces a bug, now if raid5 fails during IO, it may result with a hung task without completion. Faulty flag on the device is necessary to process all requests and is checked many times, mainly in analyze_stripe(). Allow to set faulty on drive again and set MD_BROKEN if raid is failed. As a result, this level is allowed to achieve failed state again, but communication with userspace (via -EBUSY status) will be preserved. This restores possibility to fail array via #mdadm --set-faulty command and will be fixed by additional verification on mdadm side. Reproduction steps: mdadm -CR imsm -e imsm -n 3 /dev/nvme[0-2]n1 mdadm -CR r5 -e imsm -l5 -n3 /dev/nvme[0-2]n1 --assume-clean mkfs.xfs /dev/md126 -f mount /dev/md126 /mnt/root/ fio --filename=/mnt/root/file --size=5GB --direct=1 --rw=randrw --bs=64k --ioengine=libaio --iodepth=64 --runtime=240 --numjobs=4 --time_based --group_reporting --name=throughput-test-job --eta-newline=1 & echo 1 > /sys/block/nvme2n1/device/device/remove echo 1 > /sys/block/nvme1n1/device/device/remove [ 1475.787779] Call Trace: [ 1475.793111] __schedule+0x2a6/0x700 [ 1475.799460] schedule+0x38/0xa0 [ 1475.805454] raid5_get_active_stripe+0x469/0x5f0 [raid456] [ 1475.813856] ? finish_wait+0x80/0x80 [ 1475.820332] raid5_make_request+0x180/0xb40 [raid456] [ 1475.828281] ? finish_wait+0x80/0x80 [ 1475.834727] ? finish_wait+0x80/0x80 [ 1475.841127] ? finish_wait+0x80/0x80 [ 1475.847480] md_handle_request+0x119/0x190 [ 1475.854390] md_make_request+0x8a/0x190 [ 1475.861041] generic_make_request+0xcf/0x310 [ 1475.868145] submit_bio+0x3c/0x160 [ 1475.874355] iomap_dio_submit_bio.isra.20+0x51/0x60 [ 1475.882070] iomap_dio_bio_actor+0x175/0x390 [ 1475.889149] iomap_apply+0xff/0x310 [ 1475.895447] ? iomap_dio_bio_actor+0x390/0x390 [ 1475.902736] ? iomap_dio_bio_actor+0x390/0x390 [ 1475.909974] iomap_dio_rw+0x2f2/0x490 [ 1475.916415] ? iomap_dio_bio_actor+0x390/0x390 [ 1475.923680] ? atime_needs_update+0x77/0xe0 [ 1475.930674] ? xfs_file_dio_aio_read+0x6b/0xe0 [xfs] [ 1475.938455] xfs_file_dio_aio_read+0x6b/0xe0 [xfs] [ 1475.946084] xfs_file_read_iter+0xba/0xd0 [xfs] [ 1475.953403] aio_read+0xd5/0x180 [ 1475.959395] ? _cond_resched+0x15/0x30 [ 1475.965907] io_submit_one+0x20b/0x3c0 [ 1475.972398] __x64_sys_io_submit+0xa2/0x180 [ 1475.979335] ? do_io_getevents+0x7c/0xc0 [ 1475.986009] do_syscall_64+0x5b/0x1a0 [ 1475.992419] entry_SYSCALL_64_after_hwframe+0x65/0xca [ 1476.000255] RIP: 0033:0x7f11fc27978d [ 1476.006631] Code: Bad RIP value. [ 1476.073251] INFO: task fio:3877 blocked for more than 120 seconds. Cc: stable@vger.kernel.org Fixes: fb73b357fb9 ("raid5: block failing device if raid will be failed") Reviewd-by: Xiao Ni Signed-off-by: Mariusz Tkaczyk Signed-off-by: Song Liu Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 47 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b9d062f0a02b2..e54d802ee0bb8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -686,17 +686,17 @@ int raid5_calc_degraded(struct r5conf *conf) return degraded; } -static int has_failed(struct r5conf *conf) +static bool has_failed(struct r5conf *conf) { - int degraded; + int degraded = conf->mddev->degraded; - if (conf->mddev->reshape_position == MaxSector) - return conf->mddev->degraded > conf->max_degraded; + if (test_bit(MD_BROKEN, &conf->mddev->flags)) + return true; - degraded = raid5_calc_degraded(conf); - if (degraded > conf->max_degraded) - return 1; - return 0; + if (conf->mddev->reshape_position != MaxSector) + degraded = raid5_calc_degraded(conf); + + return degraded > conf->max_degraded; } struct stripe_head * @@ -2877,34 +2877,31 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev) unsigned long flags; pr_debug("raid456: error called\n"); + pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n", + mdname(mddev), bdevname(rdev->bdev, b)); + spin_lock_irqsave(&conf->device_lock, flags); + set_bit(Faulty, &rdev->flags); + clear_bit(In_sync, &rdev->flags); + mddev->degraded = raid5_calc_degraded(conf); - if (test_bit(In_sync, &rdev->flags) && - mddev->degraded == conf->max_degraded) { - /* - * Don't allow to achieve failed state - * Don't try to recover this device - */ + if (has_failed(conf)) { + set_bit(MD_BROKEN, &conf->mddev->flags); conf->recovery_disabled = mddev->recovery_disabled; - spin_unlock_irqrestore(&conf->device_lock, flags); - return; + + pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n", + mdname(mddev), mddev->degraded, conf->raid_disks); + } else { + pr_crit("md/raid:%s: Operation continuing on %d devices.\n", + mdname(mddev), conf->raid_disks - mddev->degraded); } - set_bit(Faulty, &rdev->flags); - clear_bit(In_sync, &rdev->flags); - mddev->degraded = raid5_calc_degraded(conf); spin_unlock_irqrestore(&conf->device_lock, flags); set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(Blocked, &rdev->flags); set_mask_bits(&mddev->sb_flags, 0, BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING)); - pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n" - "md/raid:%s: Operation continuing on %d devices.\n", - mdname(mddev), - bdevname(rdev->bdev, b), - mdname(mddev), - conf->raid_disks - mddev->degraded); r5c_update_on_rdev_error(mddev, rdev); } -- GitLab From 58cf68a1886d14ffdc5c892ce483a82156769e88 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 12 May 2022 20:38:37 -0700 Subject: [PATCH 0326/4335] fs/ntfs3: validate BOOT sectors_per_clusters commit a3b774342fa752a5290c0de36375289dfcf4a260 upstream. When the NTFS BOOT sectors_per_clusters field is > 0x80, it represents a shift value. Make sure that the shift value is not too large before using it (NTFS max cluster size is 2MB). Return -EVINVAL if it too large. This prevents negative shift values and shift values that are larger than the field size. Prevents this UBSAN error: UBSAN: shift-out-of-bounds in ../fs/ntfs3/super.c:673:16 shift exponent -192 is negative Link: https://lkml.kernel.org/r/20220502175342.20296-1-rdunlap@infradead.org Fixes: 82cae269cfa9 ("fs/ntfs3: Add initialization of super block") Signed-off-by: Randy Dunlap Reported-by: syzbot+1631f09646bc214d2e76@syzkaller.appspotmail.com Reviewed-by: Namjae Jeon Cc: Konstantin Komarov Cc: Alexander Viro Cc: Kari Argillander Cc: Namjae Jeon Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/super.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index d41d76979e121..7f85ec83e196f 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -668,9 +668,11 @@ static u32 format_size_gb(const u64 bytes, u32 *mb) static u32 true_sectors_per_clst(const struct NTFS_BOOT *boot) { - return boot->sectors_per_clusters <= 0x80 - ? boot->sectors_per_clusters - : (1u << (0 - boot->sectors_per_clusters)); + if (boot->sectors_per_clusters <= 0x80) + return boot->sectors_per_clusters; + if (boot->sectors_per_clusters >= 0xf4) /* limit shift to 2MB max */ + return 1U << (0 - boot->sectors_per_clusters); + return -EINVAL; } /* @@ -713,6 +715,8 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, /* cluster size: 512, 1K, 2K, 4K, ... 2M */ sct_per_clst = true_sectors_per_clst(boot); + if ((int)sct_per_clst < 0) + goto out; if (!is_power_of_2(sct_per_clst)) goto out; -- GitLab From 18242f3428381c30f6bd04ce3c4a0a5f53a163a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ma=C5=9Blanka?= Date: Tue, 5 Apr 2022 17:04:07 +0200 Subject: [PATCH 0327/4335] HID: multitouch: Add support for Google Whiskers Touchpad commit 1d07cef7fd7599450b3d03e1915efc2a96e1f03f upstream. The Google Whiskers touchpad does not work properly with the default multitouch configuration. Instead, use the same configuration as Google Rose. Signed-off-by: Marek Maslanka Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-multitouch.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index e1afddb7b33d8..e45a4350ab72f 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2176,6 +2176,9 @@ static const struct hid_device_id mt_devices[] = { { .driver_data = MT_CLS_GOOGLE, HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_TOUCH_ROSE) }, + { .driver_data = MT_CLS_GOOGLE, + HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_GOOGLE, + USB_DEVICE_ID_GOOGLE_WHISKERS) }, /* Generic MT device */ { HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH, HID_ANY_ID, HID_ANY_ID) }, -- GitLab From 12b635b4e349e6d03a2ec698f946073f62770892 Mon Sep 17 00:00:00 2001 From: Tao Jin Date: Sun, 3 Apr 2022 12:57:44 -0400 Subject: [PATCH 0328/4335] HID: multitouch: add quirks to enable Lenovo X12 trackpoint commit 95cd2cdc88c755dcd0a58b951faeb77742c733a4 upstream. This applies the similar quirks used by previous generation devices such as X1 tablet for X12 tablet, so that the trackpoint and buttons can work. This patch was applied and tested working on 5.17.1 . Cc: stable@vger.kernel.org # 5.8+ given that it relies on 40d5bb87377a Signed-off-by: Tao Jin Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/CO6PR03MB6241CB276FCDC7F4CEDC34F6E1E29@CO6PR03MB6241.namprd03.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-multitouch.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 645a5f566d233..42b5b050b72d1 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -753,6 +753,7 @@ #define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085 #define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3 #define USB_DEVICE_ID_LENOVO_X1_TAB3 0x60b5 +#define USB_DEVICE_ID_LENOVO_X12_TAB 0x60fe #define USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E 0x600e #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D 0x608d #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019 0x6019 diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index e45a4350ab72f..f382444dc2dba 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2032,6 +2032,12 @@ static const struct hid_device_id mt_devices[] = { USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB3) }, + /* Lenovo X12 TAB Gen 1 */ + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_LENOVO, + USB_DEVICE_ID_LENOVO_X12_TAB) }, + /* MosArt panels */ { .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE, MT_USB_DEVICE(USB_VENDOR_ID_ASUS, -- GitLab From cdf828c11c127b14344f4b2ac9826f40538528f3 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Thu, 12 May 2022 14:50:57 -0700 Subject: [PATCH 0329/4335] x86/sgx: Disconnect backing page references from dirty status commit 6bd429643cc265e94a9d19839c771bcc5d008fa8 upstream. SGX uses shmem backing storage to store encrypted enclave pages and their crypto metadata when enclave pages are moved out of enclave memory. Two shmem backing storage pages are associated with each enclave page - one backing page to contain the encrypted enclave page data and one backing page (shared by a few enclave pages) to contain the crypto metadata used by the processor to verify the enclave page when it is loaded back into the enclave. sgx_encl_put_backing() is used to release references to the backing storage and, optionally, mark both backing store pages as dirty. Managing references and dirty status together in this way results in both backing store pages marked as dirty, even if only one of the backing store pages are changed. Additionally, waiting until the page reference is dropped to set the page dirty risks a race with the page fault handler that may load outdated data into the enclave when a page is faulted right after it is reclaimed. Consider what happens if the reclaimer writes a page to the backing store and the page is immediately faulted back, before the reclaimer is able to set the dirty bit of the page: sgx_reclaim_pages() { sgx_vma_fault() { ... sgx_encl_get_backing(); ... ... sgx_reclaimer_write() { mutex_lock(&encl->lock); /* Write data to backing store */ mutex_unlock(&encl->lock); } mutex_lock(&encl->lock); __sgx_encl_eldu() { ... /* * Enclave backing store * page not released * nor marked dirty - * contents may not be * up to date. */ sgx_encl_get_backing(); ... /* * Enclave data restored * from backing store * and PCMD pages that * are not up to date. * ENCLS[ELDU] faults * because of MAC or PCMD * checking failure. */ sgx_encl_put_backing(); } ... /* set page dirty */ sgx_encl_put_backing(); ... mutex_unlock(&encl->lock); } } Remove the option to sgx_encl_put_backing() to set the backing pages as dirty and set the needed pages as dirty right after receiving important data while enclave mutex is held. This ensures that the page fault handler can get up to date data from a page and prepares the code for a following change where only one of the backing pages need to be marked as dirty. Cc: stable@vger.kernel.org Fixes: 1728ab54b4be ("x86/sgx: Add a page reclaimer") Suggested-by: Dave Hansen Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Tested-by: Haitao Huang Link: https://lore.kernel.org/linux-sgx/8922e48f-6646-c7cc-6393-7c78dcf23d23@intel.com/ Link: https://lkml.kernel.org/r/fa9f98986923f43e72ef4c6702a50b2a0b3c42e3.1652389823.git.reinette.chatre@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/sgx/encl.c | 10 ++-------- arch/x86/kernel/cpu/sgx/encl.h | 2 +- arch/x86/kernel/cpu/sgx/main.c | 6 ++++-- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 7c63a1911fae9..398695a206050 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -94,7 +94,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, kunmap_atomic(pcmd_page); kunmap_atomic((void *)(unsigned long)pginfo.contents); - sgx_encl_put_backing(&b, false); + sgx_encl_put_backing(&b); sgx_encl_truncate_backing_page(encl, page_index); @@ -645,15 +645,9 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, /** * sgx_encl_put_backing() - Unpin the backing storage * @backing: data for accessing backing storage for the page - * @do_write: mark pages dirty */ -void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write) +void sgx_encl_put_backing(struct sgx_backing *backing) { - if (do_write) { - set_page_dirty(backing->pcmd); - set_page_dirty(backing->contents); - } - put_page(backing->pcmd); put_page(backing->contents); } diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h index fec43ca65065b..d44e7372151f0 100644 --- a/arch/x86/kernel/cpu/sgx/encl.h +++ b/arch/x86/kernel/cpu/sgx/encl.h @@ -107,7 +107,7 @@ void sgx_encl_release(struct kref *ref); int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm); int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, struct sgx_backing *backing); -void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write); +void sgx_encl_put_backing(struct sgx_backing *backing); int sgx_encl_test_and_clear_young(struct mm_struct *mm, struct sgx_encl_page *page); diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 8471a8b9b48e8..c8b11280db451 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -170,6 +170,8 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot, backing->pcmd_offset; ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot); + set_page_dirty(backing->pcmd); + set_page_dirty(backing->contents); kunmap_atomic((void *)(unsigned long)(pginfo.metadata - backing->pcmd_offset)); @@ -299,7 +301,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page, sgx_encl_free_epc_page(encl->secs.epc_page); encl->secs.epc_page = NULL; - sgx_encl_put_backing(&secs_backing, true); + sgx_encl_put_backing(&secs_backing); } out: @@ -392,7 +394,7 @@ skip: encl_page = epc_page->owner; sgx_reclaimer_write(epc_page, &backing[i]); - sgx_encl_put_backing(&backing[i], true); + sgx_encl_put_backing(&backing[i]); kref_put(&encl_page->encl->refcount, sgx_encl_release); epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; -- GitLab From fd55a1707750a9c180050d363fe7341f9bc07935 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Thu, 12 May 2022 14:50:58 -0700 Subject: [PATCH 0330/4335] x86/sgx: Mark PCMD page as dirty when modifying contents commit 2154e1c11b7080aa19f47160bd26b6f39bbd7824 upstream. Recent commit 08999b2489b4 ("x86/sgx: Free backing memory after faulting the enclave page") expanded __sgx_encl_eldu() to clear an enclave page's PCMD (Paging Crypto MetaData) from the PCMD page in the backing store after the enclave page is restored to the enclave. Since the PCMD page in the backing store is modified the page should be marked as dirty to ensure the modified data is retained. Cc: stable@vger.kernel.org Fixes: 08999b2489b4 ("x86/sgx: Free backing memory after faulting the enclave page") Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Tested-by: Haitao Huang Link: https://lkml.kernel.org/r/00cd2ac480db01058d112e347b32599c1a806bc4.1652389823.git.reinette.chatre@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/sgx/encl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 398695a206050..5104a428b72cd 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -84,6 +84,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, } memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd)); + set_page_dirty(b.pcmd); /* * The area for the PCMD in the page was zeroed above. Check if the -- GitLab From b070e97fbd143bc972b167f8c47796d9d1db9e44 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Thu, 12 May 2022 14:50:59 -0700 Subject: [PATCH 0331/4335] x86/sgx: Obtain backing storage page with enclave mutex held commit 0e4e729a830c1e7f31d3b3fbf8feb355a402b117 upstream. Haitao reported encountering a WARN triggered by the ENCLS[ELDU] instruction faulting with a #GP. The WARN is encountered when the reclaimer evicts a range of pages from the enclave when the same pages are faulted back right away. The SGX backing storage is accessed on two paths: when there are insufficient free pages in the EPC the reclaimer works to move enclave pages to the backing storage and as enclaves access pages that have been moved to the backing storage they are retrieved from there as part of page fault handling. An oversubscribed SGX system will often run the reclaimer and page fault handler concurrently and needs to ensure that the backing store is accessed safely between the reclaimer and the page fault handler. This is not the case because the reclaimer accesses the backing store without the enclave mutex while the page fault handler accesses the backing store with the enclave mutex. Consider the scenario where a page is faulted while a page sharing a PCMD page with the faulted page is being reclaimed. The consequence is a race between the reclaimer and page fault handler, the reclaimer attempting to access a PCMD at the same time it is truncated by the page fault handler. This could result in lost PCMD data. Data may still be lost if the reclaimer wins the race, this is addressed in the following patch. The reclaimer accesses pages from the backing storage without holding the enclave mutex and runs the risk of concurrently accessing the backing storage with the page fault handler that does access the backing storage with the enclave mutex held. In the scenario below a PCMD page is truncated from the backing store after all its pages have been loaded in to the enclave at the same time the PCMD page is loaded from the backing store when one of its pages are reclaimed: sgx_reclaim_pages() { sgx_vma_fault() { ... mutex_lock(&encl->lock); ... __sgx_encl_eldu() { ... if (pcmd_page_empty) { /* * EPC page being reclaimed /* * shares a PCMD page with an * PCMD page truncated * enclave page that is being * while requested from * faulted in. * reclaimer. */ */ sgx_encl_get_backing() <----------> sgx_encl_truncate_backing_page() } mutex_unlock(&encl->lock); } } In this scenario there is a race between the reclaimer and the page fault handler when the reclaimer attempts to get access to the same PCMD page that is being truncated. This could result in the reclaimer writing to the PCMD page that is then truncated, causing the PCMD data to be lost, or in a new PCMD page being allocated. The lost PCMD data may still occur after protecting the backing store access with the mutex - this is fixed in the next patch. By ensuring the backing store is accessed with the mutex held the enclave page state can be made accurate with the SGX_ENCL_PAGE_BEING_RECLAIMED flag accurately reflecting that a page is in the process of being reclaimed. Consistently protect the reclaimer's backing store access with the enclave's mutex to ensure that it can safely run concurrently with the page fault handler. Cc: stable@vger.kernel.org Fixes: 1728ab54b4be ("x86/sgx: Add a page reclaimer") Reported-by: Haitao Huang Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Tested-by: Haitao Huang Link: https://lkml.kernel.org/r/fa2e04c561a8555bfe1f4e7adc37d60efc77387b.1652389823.git.reinette.chatre@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/sgx/main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index c8b11280db451..00e09a2b933ac 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -289,6 +289,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page, sgx_encl_ewb(epc_page, backing); encl_page->epc_page = NULL; encl->secs_child_cnt--; + sgx_encl_put_backing(backing); if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) { ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size), @@ -362,11 +363,14 @@ static void sgx_reclaim_pages(void) goto skip; page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base); + + mutex_lock(&encl_page->encl->lock); ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]); - if (ret) + if (ret) { + mutex_unlock(&encl_page->encl->lock); goto skip; + } - mutex_lock(&encl_page->encl->lock); encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED; mutex_unlock(&encl_page->encl->lock); continue; @@ -394,7 +398,6 @@ skip: encl_page = epc_page->owner; sgx_reclaimer_write(epc_page, &backing[i]); - sgx_encl_put_backing(&backing[i]); kref_put(&encl_page->encl->refcount, sgx_encl_release); epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; -- GitLab From cd69479425118c7f54e83b320f2fe5cf41bedfe9 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Thu, 12 May 2022 14:51:00 -0700 Subject: [PATCH 0332/4335] x86/sgx: Fix race between reclaimer and page fault handler commit af117837ceb9a78e995804ade4726ad2c2c8981f upstream. Haitao reported encountering a WARN triggered by the ENCLS[ELDU] instruction faulting with a #GP. The WARN is encountered when the reclaimer evicts a range of pages from the enclave when the same pages are faulted back right away. Consider two enclave pages (ENCLAVE_A and ENCLAVE_B) sharing a PCMD page (PCMD_AB). ENCLAVE_A is in the enclave memory and ENCLAVE_B is in the backing store. PCMD_AB contains just one entry, that of ENCLAVE_B. Scenario proceeds where ENCLAVE_A is being evicted from the enclave while ENCLAVE_B is faulted in. sgx_reclaim_pages() { ... /* * Reclaim ENCLAVE_A */ mutex_lock(&encl->lock); /* * Get a reference to ENCLAVE_A's * shmem page where enclave page * encrypted data will be stored * as well as a reference to the * enclave page's PCMD data page, * PCMD_AB. * Release mutex before writing * any data to the shmem pages. */ sgx_encl_get_backing(...); encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED; mutex_unlock(&encl->lock); /* * Fault ENCLAVE_B */ sgx_vma_fault() { mutex_lock(&encl->lock); /* * Get reference to * ENCLAVE_B's shmem page * as well as PCMD_AB. */ sgx_encl_get_backing(...) /* * Load page back into * enclave via ELDU. */ /* * Release reference to * ENCLAVE_B' shmem page and * PCMD_AB. */ sgx_encl_put_backing(...); /* * PCMD_AB is found empty so * it and ENCLAVE_B's shmem page * are truncated. */ /* Truncate ENCLAVE_B backing page */ sgx_encl_truncate_backing_page(); /* Truncate PCMD_AB */ sgx_encl_truncate_backing_page(); mutex_unlock(&encl->lock); ... } mutex_lock(&encl->lock); encl_page->desc &= ~SGX_ENCL_PAGE_BEING_RECLAIMED; /* * Write encrypted contents of * ENCLAVE_A to ENCLAVE_A shmem * page and its PCMD data to * PCMD_AB. */ sgx_encl_put_backing(...) /* * Reference to PCMD_AB is * dropped and it is truncated. * ENCLAVE_A's PCMD data is lost. */ mutex_unlock(&encl->lock); } What happens next depends on whether it is ENCLAVE_A being faulted in or ENCLAVE_B being evicted - but both end up with ENCLS[ELDU] faulting with a #GP. If ENCLAVE_A is faulted then at the time sgx_encl_get_backing() is called a new PCMD page is allocated and providing the empty PCMD data for ENCLAVE_A would cause ENCLS[ELDU] to #GP If ENCLAVE_B is evicted first then a new PCMD_AB would be allocated by the reclaimer but later when ENCLAVE_A is faulted the ENCLS[ELDU] instruction would #GP during its checks of the PCMD value and the WARN would be encountered. Noting that the reclaimer sets SGX_ENCL_PAGE_BEING_RECLAIMED at the time it obtains a reference to the backing store pages of an enclave page it is in the process of reclaiming, fix the race by only truncating the PCMD page after ensuring that no page sharing the PCMD page is in the process of being reclaimed. Cc: stable@vger.kernel.org Fixes: 08999b2489b4 ("x86/sgx: Free backing memory after faulting the enclave page") Reported-by: Haitao Huang Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Tested-by: Haitao Huang Link: https://lkml.kernel.org/r/ed20a5db516aa813873268e125680041ae11dfcf.1652389823.git.reinette.chatre@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/sgx/encl.c | 94 +++++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 5104a428b72cd..243f3bd781453 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -12,6 +12,92 @@ #include "encls.h" #include "sgx.h" +#define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd)) +/* + * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to + * determine the page index associated with the first PCMD entry + * within a PCMD page. + */ +#define PCMD_FIRST_MASK GENMASK(4, 0) + +/** + * reclaimer_writing_to_pcmd() - Query if any enclave page associated with + * a PCMD page is in process of being reclaimed. + * @encl: Enclave to which PCMD page belongs + * @start_addr: Address of enclave page using first entry within the PCMD page + * + * When an enclave page is reclaimed some Paging Crypto MetaData (PCMD) is + * stored. The PCMD data of a reclaimed enclave page contains enough + * information for the processor to verify the page at the time + * it is loaded back into the Enclave Page Cache (EPC). + * + * The backing storage to which enclave pages are reclaimed is laid out as + * follows: + * Encrypted enclave pages:SECS page:PCMD pages + * + * Each PCMD page contains the PCMD metadata of + * PAGE_SIZE/sizeof(struct sgx_pcmd) enclave pages. + * + * A PCMD page can only be truncated if it is (a) empty, and (b) not in the + * process of getting data (and thus soon being non-empty). (b) is tested with + * a check if an enclave page sharing the PCMD page is in the process of being + * reclaimed. + * + * The reclaimer sets the SGX_ENCL_PAGE_BEING_RECLAIMED flag when it + * intends to reclaim that enclave page - it means that the PCMD page + * associated with that enclave page is about to get some data and thus + * even if the PCMD page is empty, it should not be truncated. + * + * Context: Enclave mutex (&sgx_encl->lock) must be held. + * Return: 1 if the reclaimer is about to write to the PCMD page + * 0 if the reclaimer has no intention to write to the PCMD page + */ +static int reclaimer_writing_to_pcmd(struct sgx_encl *encl, + unsigned long start_addr) +{ + int reclaimed = 0; + int i; + + /* + * PCMD_FIRST_MASK is based on number of PCMD entries within + * PCMD page being 32. + */ + BUILD_BUG_ON(PCMDS_PER_PAGE != 32); + + for (i = 0; i < PCMDS_PER_PAGE; i++) { + struct sgx_encl_page *entry; + unsigned long addr; + + addr = start_addr + i * PAGE_SIZE; + + /* + * Stop when reaching the SECS page - it does not + * have a page_array entry and its reclaim is + * started and completed with enclave mutex held so + * it does not use the SGX_ENCL_PAGE_BEING_RECLAIMED + * flag. + */ + if (addr == encl->base + encl->size) + break; + + entry = xa_load(&encl->page_array, PFN_DOWN(addr)); + if (!entry) + continue; + + /* + * VA page slot ID uses same bit as the flag so it is important + * to ensure that the page is not already in backing store. + */ + if (entry->epc_page && + (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)) { + reclaimed = 1; + break; + } + } + + return reclaimed; +} + /* * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's * follow right after the EPC data in the backing storage. In addition to the @@ -47,6 +133,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK; struct sgx_encl *encl = encl_page->encl; pgoff_t page_index, page_pcmd_off; + unsigned long pcmd_first_page; struct sgx_pageinfo pginfo; struct sgx_backing b; bool pcmd_page_empty; @@ -58,6 +145,11 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, else page_index = PFN_DOWN(encl->size); + /* + * Address of enclave page using the first entry within the PCMD page. + */ + pcmd_first_page = PFN_PHYS(page_index & ~PCMD_FIRST_MASK) + encl->base; + page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); ret = sgx_encl_get_backing(encl, page_index, &b); @@ -99,7 +191,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, sgx_encl_truncate_backing_page(encl, page_index); - if (pcmd_page_empty) + if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off)); return ret; -- GitLab From 9ebed8d283e508779c694bcf3208512cf44d4d8b Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Thu, 12 May 2022 14:51:01 -0700 Subject: [PATCH 0333/4335] x86/sgx: Ensure no data in PCMD page after truncate commit e3a3bbe3e99de73043a1d32d36cf4d211dc58c7e upstream. A PCMD (Paging Crypto MetaData) page contains the PCMD structures of enclave pages that have been encrypted and moved to the shmem backing store. When all enclave pages sharing a PCMD page are loaded in the enclave, there is no need for the PCMD page and it can be truncated from the backing store. A few issues appeared around the truncation of PCMD pages. The known issues have been addressed but the PCMD handling code could be made more robust by loudly complaining if any new issue appears in this area. Add a check that will complain with a warning if the PCMD page is not actually empty after it has been truncated. There should never be data in the PCMD page at this point since it is was just checked to be empty and truncated with enclave mutex held and is updated with the enclave mutex held. Suggested-by: Dave Hansen Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Tested-by: Haitao Huang Link: https://lkml.kernel.org/r/6495120fed43fafc1496d09dd23df922b9a32709.1652389823.git.reinette.chatre@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/sgx/encl.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 243f3bd781453..3c24e6124d955 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -187,12 +187,20 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, kunmap_atomic(pcmd_page); kunmap_atomic((void *)(unsigned long)pginfo.contents); + get_page(b.pcmd); sgx_encl_put_backing(&b); sgx_encl_truncate_backing_page(encl, page_index); - if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) + if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) { sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off)); + pcmd_page = kmap_atomic(b.pcmd); + if (memchr_inv(pcmd_page, 0, PAGE_SIZE)) + pr_warn("PCMD page not empty after truncate.\n"); + kunmap_atomic(pcmd_page); + } + + put_page(b.pcmd); return ret; } -- GitLab From d207a2e2080b8054b71855767e0846268a737dd9 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Fri, 15 Apr 2022 13:59:51 +0200 Subject: [PATCH 0334/4335] media: i2c: imx412: Fix reset GPIO polarity commit bb25f071fc92d3d227178a45853347c7b3b45a6b upstream. The imx412/imx577 sensor has a reset line that is active low not active high. Currently the logic for this is inverted. The right way to define the reset line is to declare it active low in the DTS and invert the logic currently contained in the driver. The DTS should represent the hardware does i.e. reset is active low. So: + reset-gpios = <&tlmm 78 GPIO_ACTIVE_LOW>; not: - reset-gpios = <&tlmm 78 GPIO_ACTIVE_HIGH>; I was a bit reticent about changing this logic since I thought it might negatively impact @intel.com users. Googling a bit though I believe this sensor is used on "Keem Bay" which is clearly a DTS based system and is not upstream yet. Fixes: 9214e86c0cc1 ("media: i2c: Add imx412 camera sensor driver") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Reviewed-by: Jacopo Mondi Reviewed-by: Daniele Alessandrelli Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/imx412.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/i2c/imx412.c b/drivers/media/i2c/imx412.c index be3f6ea555597..e6be6b4250f5a 100644 --- a/drivers/media/i2c/imx412.c +++ b/drivers/media/i2c/imx412.c @@ -1011,7 +1011,7 @@ static int imx412_power_on(struct device *dev) struct imx412 *imx412 = to_imx412(sd); int ret; - gpiod_set_value_cansleep(imx412->reset_gpio, 1); + gpiod_set_value_cansleep(imx412->reset_gpio, 0); ret = clk_prepare_enable(imx412->inclk); if (ret) { @@ -1024,7 +1024,7 @@ static int imx412_power_on(struct device *dev) return 0; error_reset: - gpiod_set_value_cansleep(imx412->reset_gpio, 0); + gpiod_set_value_cansleep(imx412->reset_gpio, 1); return ret; } @@ -1040,7 +1040,7 @@ static int imx412_power_off(struct device *dev) struct v4l2_subdev *sd = dev_get_drvdata(dev); struct imx412 *imx412 = to_imx412(sd); - gpiod_set_value_cansleep(imx412->reset_gpio, 0); + gpiod_set_value_cansleep(imx412->reset_gpio, 1); clk_disable_unprepare(imx412->inclk); -- GitLab From 718ff5fc7e1cbd385faa21ccdf80a2dfc8d388f7 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Fri, 15 Apr 2022 13:59:52 +0200 Subject: [PATCH 0335/4335] media: i2c: imx412: Fix power_off ordering commit 9a199694c6a1519522ec73a4571f68abe9f13d5d upstream. The enable path does - gpio - clock The disable path does - gpio - clock Fix the order on the power-off path so that power-off and power-on have the same ordering for clock and gpio. Fixes: 9214e86c0cc1 ("media: i2c: Add imx412 camera sensor driver") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Reviewed-by: Jacopo Mondi Reviewed-by: Daniele Alessandrelli Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/imx412.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/i2c/imx412.c b/drivers/media/i2c/imx412.c index e6be6b4250f5a..84279a6808730 100644 --- a/drivers/media/i2c/imx412.c +++ b/drivers/media/i2c/imx412.c @@ -1040,10 +1040,10 @@ static int imx412_power_off(struct device *dev) struct v4l2_subdev *sd = dev_get_drvdata(dev); struct imx412 *imx412 = to_imx412(sd); - gpiod_set_value_cansleep(imx412->reset_gpio, 1); - clk_disable_unprepare(imx412->inclk); + gpiod_set_value_cansleep(imx412->reset_gpio, 1); + return 0; } -- GitLab From b5745954a993a81b0d593d8ad60a349c45802d4c Mon Sep 17 00:00:00 2001 From: Stefan Mahnke-Hartmann Date: Fri, 13 May 2022 15:41:51 +0200 Subject: [PATCH 0336/4335] tpm: Fix buffer access in tpm2_get_tpm_pt() commit e57b2523bd37e6434f4e64c7a685e3715ad21e9a upstream. Under certain conditions uninitialized memory will be accessed. As described by TCG Trusted Platform Module Library Specification, rev. 1.59 (Part 3: Commands), if a TPM2_GetCapability is received, requesting a capability, the TPM in field upgrade mode may return a zero length list. Check the property count in tpm2_get_tpm_pt(). Fixes: 2ab3241161b3 ("tpm: migrate tpm2_get_tpm_pt() to use struct tpm_buf") Cc: stable@vger.kernel.org Signed-off-by: Stefan Mahnke-Hartmann Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm2-cmd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index a25815a6f6253..de92065394be9 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -400,7 +400,16 @@ ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id, u32 *value, if (!rc) { out = (struct tpm2_get_cap_out *) &buf.data[TPM_HEADER_SIZE]; - *value = be32_to_cpu(out->value); + /* + * To prevent failing boot up of some systems, Infineon TPM2.0 + * returns SUCCESS on TPM2_Startup in field upgrade mode. Also + * the TPM2_Getcapability command returns a zero length list + * in field upgrade mode. + */ + if (be32_to_cpu(out->property_cnt) > 0) + *value = be32_to_cpu(out->value); + else + rc = -ENODATA; } tpm_buf_destroy(&buf); return rc; -- GitLab From 581b2ed60535c432282534a46159d29594108061 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 18 Mar 2022 14:02:01 +0800 Subject: [PATCH 0337/4335] tpm: ibmvtpm: Correct the return value in tpm_ibmvtpm_probe() commit d0dc1a7100f19121f6e7450f9cdda11926aa3838 upstream. Currently it returns zero when CRQ response timed out, it should return an error code instead. Fixes: d8d74ea3c002 ("tpm: ibmvtpm: Wait for buffer to be set before proceeding") Signed-off-by: Xiu Jianfeng Reviewed-by: Stefan Berger Acked-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_ibmvtpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 3af4c07a9342f..d3989b257f422 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -681,6 +681,7 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, if (!wait_event_timeout(ibmvtpm->crq_queue.wq, ibmvtpm->rtce_buf != NULL, HZ)) { + rc = -ENODEV; dev_err(dev, "CRQ response timed out\n"); goto init_irq_cleanup; } -- GitLab From 9bc73bbd559711b294812307fd00383bae1535f4 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Wed, 27 Apr 2022 18:28:39 +0900 Subject: [PATCH 0338/4335] docs: submitting-patches: Fix crossref to 'The canonical patch format' commit 6d5aa418b3bd42cdccc36e94ee199af423ef7c84 upstream. The reference to `explicit_in_reply_to` is pointless as when the reference was added in the form of "#15" [1], Section 15) was "The canonical patch format". The reference of "#15" had not been properly updated in a couple of reorganizations during the plain-text SubmittingPatches era. Fix it by using `the_canonical_patch_format`. [1]: 2ae19acaa50a ("Documentation: Add "how to write a good patch summary" to SubmittingPatches") Signed-off-by: Akira Yokosawa Fixes: 5903019b2a5e ("Documentation/SubmittingPatches: convert it to ReST markup") Fixes: 9b2c76777acc ("Documentation/SubmittingPatches: enrich the Sphinx output") Cc: Jonathan Corbet Cc: Mauro Carvalho Chehab Cc: stable@vger.kernel.org # v4.9+ Link: https://lore.kernel.org/r/64e105a5-50be-23f2-6cae-903a2ea98e18@gmail.com Signed-off-by: Jonathan Corbet Signed-off-by: Greg Kroah-Hartman --- Documentation/process/submitting-patches.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index 8ad6b93f91e6d..025272139539c 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -72,7 +72,7 @@ as you intend it to. The maintainer will thank you if you write your patch description in a form which can be easily pulled into Linux's source code management -system, ``git``, as a "commit log". See :ref:`explicit_in_reply_to`. +system, ``git``, as a "commit log". See :ref:`the_canonical_patch_format`. Solve only one problem per patch. If your description starts to get long, that's a sign that you probably need to split up your patch. -- GitLab From fa1c51c82c0e72c101e62448df26c57b8fb10cbd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 May 2022 10:08:10 -0400 Subject: [PATCH 0339/4335] NFS: Memory allocation failures are not server fatal errors commit 452284407c18d8a522c3039339b1860afa0025a8 upstream. We need to filter out ENOMEM in nfs_error_is_fatal_on_server(), because running out of memory on our client is not a server error. Reported-by: Olga Kornievskaia Fixes: 2dc23afffbca ("NFS: ENOMEM should also be a fatal error.") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/internal.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index c8845242d4225..fbfe293af72c9 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -834,6 +834,7 @@ static inline bool nfs_error_is_fatal_on_server(int err) case 0: case -ERESTARTSYS: case -EINTR: + case -ENOMEM: return false; } return nfs_error_is_fatal(err); -- GitLab From e2fc17fcc503cfca57b5d1dd3b646ca7eebead97 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 21 May 2022 19:06:13 -0400 Subject: [PATCH 0340/4335] NFSD: Fix possible sleep during nfsd4_release_lockowner() commit ce3c4ad7f4ce5db7b4f08a1e237d8dd94b39180b upstream. nfsd4_release_lockowner() holds clp->cl_lock when it calls check_for_locks(). However, check_for_locks() calls nfsd_file_get() / nfsd_file_put() to access the backing inode's flc_posix list, and nfsd_file_put() can sleep if the inode was recently removed. Let's instead rely on the stateowner's reference count to gate whether the release is permitted. This should be a reliable indication of locks-in-use since file lock operations and ->lm_get_owner take appropriate references, which are released appropriately when file locks are removed. Reported-by: Dai Ngo Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 181bc3d9f5663..f9e2fa9cfbec5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7299,16 +7299,12 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, if (sop->so_is_open_owner || !same_owner_str(sop, owner)) continue; - /* see if there are still any locks associated with it */ - lo = lockowner(sop); - list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { - if (check_for_locks(stp->st_stid.sc_file, lo)) { - status = nfserr_locks_held; - spin_unlock(&clp->cl_lock); - return status; - } + if (atomic_read(&sop->so_count) != 1) { + spin_unlock(&clp->cl_lock); + return nfserr_locks_held; } + lo = lockowner(sop); nfs4_get_stateowner(sop); break; } -- GitLab From e36452d5da6325df7c10cffc60a9e68d21e2606d Mon Sep 17 00:00:00 2001 From: Yuntao Wang Date: Sat, 30 Apr 2022 21:08:03 +0800 Subject: [PATCH 0341/4335] bpf: Fix potential array overflow in bpf_trampoline_get_progs() commit a2aa95b71c9bbec793b5c5fa50f0a80d882b3e8d upstream. The cnt value in the 'cnt >= BPF_MAX_TRAMP_PROGS' check does not include BPF_TRAMP_MODIFY_RETURN bpf programs, so the number of the attached BPF_TRAMP_MODIFY_RETURN bpf programs in a trampoline can exceed BPF_MAX_TRAMP_PROGS. When this happens, the assignment '*progs++ = aux->prog' in bpf_trampoline_get_progs() will cause progs array overflow as the progs field in the bpf_tramp_progs struct can only hold at most BPF_MAX_TRAMP_PROGS bpf programs. Fixes: 88fd9e5352fe ("bpf: Refactor trampoline update code") Signed-off-by: Yuntao Wang Link: https://lore.kernel.org/r/20220430130803.210624-1-ytcoode@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/trampoline.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 2660fbced9ad4..4fa75791b45e2 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -414,7 +414,7 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) { enum bpf_tramp_prog_type kind; int err = 0; - int cnt; + int cnt = 0, i; kind = bpf_attach_type_to_tramp(prog); mutex_lock(&tr->mutex); @@ -425,7 +425,10 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) err = -EBUSY; goto out; } - cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]; + + for (i = 0; i < BPF_TRAMP_MAX; i++) + cnt += tr->progs_cnt[i]; + if (kind == BPF_TRAMP_REPLACE) { /* Cannot attach extension if fentry/fexit are in use. */ if (cnt) { @@ -503,16 +506,19 @@ out: void bpf_trampoline_put(struct bpf_trampoline *tr) { + int i; + if (!tr) return; mutex_lock(&trampoline_mutex); if (!refcount_dec_and_test(&tr->refcnt)) goto out; WARN_ON_ONCE(mutex_is_locked(&tr->mutex)); - if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY]))) - goto out; - if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) - goto out; + + for (i = 0; i < BPF_TRAMP_MAX; i++) + if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i]))) + goto out; + /* This code will be executed even when the last bpf_tramp_image * is alive. All progs are detached from the trampoline and the * trampoline image is patched with jmp into epilogue to skip -- GitLab From 77f8c4a5f3d0c6f1adefb44f1f8aa02de73eac2c Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Sat, 16 Apr 2022 18:57:59 +0800 Subject: [PATCH 0342/4335] bpf: Enlarge offset check value to INT_MAX in bpf_skb_{load,store}_bytes commit 45969b4152c1752089351cd6836a42a566d49bcf upstream. The data length of skb frags + frag_list may be greater than 0xffff, and skb_header_pointer can not handle negative offset. So, here INT_MAX is used to check the validity of offset. Add the same change to the related function skb_store_bytes. Fixes: 05c74e5e53f6 ("bpf: add bpf_skb_load_bytes helper") Signed-off-by: Liu Jian Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20220416105801.88708-2-liujian56@huawei.com Signed-off-by: Greg Kroah-Hartman --- net/core/filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 821278b906b71..707e2e48d7691 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1688,7 +1688,7 @@ BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset, if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) return -EINVAL; - if (unlikely(offset > 0xffff)) + if (unlikely(offset > INT_MAX)) return -EFAULT; if (unlikely(bpf_try_make_writable(skb, offset + len))) return -EFAULT; @@ -1723,7 +1723,7 @@ BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset, { void *ptr; - if (unlikely(offset > 0xffff)) + if (unlikely(offset > INT_MAX)) goto err_clear; ptr = skb_header_pointer(skb, offset, len, to); -- GitLab From 51f6657e94439f303c731b57734b9dc255687dff Mon Sep 17 00:00:00 2001 From: Yuntao Wang Date: Thu, 7 Apr 2022 21:04:23 +0800 Subject: [PATCH 0343/4335] bpf: Fix excessive memory allocation in stack_map_alloc() commit b45043192b3e481304062938a6561da2ceea46a6 upstream. The 'n_buckets * (value_size + sizeof(struct stack_map_bucket))' part of the allocated memory for 'smap' is never used after the memlock accounting was removed, thus get rid of it. [ Note, Daniel: Commit b936ca643ade ("bpf: rework memlock-based memory accounting for maps") moved `cost += n_buckets * (value_size + sizeof(struct stack_map_bucket))` up and therefore before the bpf_map_area_alloc() allocation, sigh. In a later step commit c85d69135a91 ("bpf: move memory size checks to bpf_map_charge_init()"), and the overflow checks of `cost >= U32_MAX - PAGE_SIZE` moved into bpf_map_charge_init(). And then 370868107bf6 ("bpf: Eliminate rlimit-based memory accounting for stackmap maps") finally removed the bpf_map_charge_init(). Anyway, the original code did the allocation same way as /after/ this fix. ] Fixes: b936ca643ade ("bpf: rework memlock-based memory accounting for maps") Signed-off-by: Yuntao Wang Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220407130423.798386-1-ytcoode@gmail.com Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/stackmap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index fc0f77f91224b..7efae3af62017 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -119,7 +119,6 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) return ERR_PTR(-E2BIG); cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); - cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); if (!smap) return ERR_PTR(-ENOMEM); -- GitLab From 5d0bba8232bf22ce13747cbfc8f696318ff01a50 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 19 Mar 2022 13:38:25 +0530 Subject: [PATCH 0344/4335] bpf: Reject writes for PTR_TO_MAP_KEY in check_helper_mem_access commit 7b3552d3f9f6897851fc453b5131a967167e43c2 upstream. It is not permitted to write to PTR_TO_MAP_KEY, but the current code in check_helper_mem_access would allow for it, reject this case as well, as helpers taking ARG_PTR_TO_UNINIT_MEM also take PTR_TO_MAP_KEY. Fixes: 69c087ba6225 ("bpf: Add bpf_for_each_map_elem() helper") Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220319080827.73251-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d2b119b4fbe74..db5f56643b621 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4587,6 +4587,11 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed); case PTR_TO_MAP_KEY: + if (meta && meta->raw_mode) { + verbose(env, "R%d cannot write into %s\n", regno, + reg_type_str(env, reg->type)); + return -EACCES; + } return check_mem_region_access(env, regno, reg->off, access_size, reg->map_ptr->key_size, false); case PTR_TO_MAP_VALUE: -- GitLab From 6099a6c8a749a5c8d5f8b4c4342022a92072a02b Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 19 Mar 2022 13:38:24 +0530 Subject: [PATCH 0345/4335] bpf: Check PTR_TO_MEM | MEM_RDONLY in check_helper_mem_access commit 97e6d7dab1ca4648821c790a2b7913d6d5d549db upstream. The commit being fixed was aiming to disallow users from incorrectly obtaining writable pointer to memory that is only meant to be read. This is enforced now using a MEM_RDONLY flag. For instance, in case of global percpu variables, when the BTF type is not struct (e.g. bpf_prog_active), the verifier marks register type as PTR_TO_MEM | MEM_RDONLY from bpf_this_cpu_ptr or bpf_per_cpu_ptr helpers. However, when passing such pointer to kfunc, global funcs, or BPF helpers, in check_helper_mem_access, there is no expectation MEM_RDONLY flag will be set, hence it is checked as pointer to writable memory. Later, verifier sets up argument type of global func as PTR_TO_MEM | PTR_MAYBE_NULL, so user can use a global func to get around the limitations imposed by this flag. This check will also cover global non-percpu variables that may be introduced in kernel BTF in future. Also, we update the log message for PTR_TO_BUF case to be similar to PTR_TO_MEM case, so that the reason for error is clear to user. Fixes: 34d3a78c681e ("bpf: Make per_cpu_ptr return rdonly PTR_TO_MEM.") Reviewed-by: Hao Luo Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220319080827.73251-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index db5f56643b621..25ee8d9572c6f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4602,13 +4602,23 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return check_map_access(env, regno, reg->off, access_size, zero_size_allowed); case PTR_TO_MEM: + if (type_is_rdonly_mem(reg->type)) { + if (meta && meta->raw_mode) { + verbose(env, "R%d cannot write into %s\n", regno, + reg_type_str(env, reg->type)); + return -EACCES; + } + } return check_mem_region_access(env, regno, reg->off, access_size, reg->mem_size, zero_size_allowed); case PTR_TO_BUF: if (type_is_rdonly_mem(reg->type)) { - if (meta && meta->raw_mode) + if (meta && meta->raw_mode) { + verbose(env, "R%d cannot write into %s\n", regno, + reg_type_str(env, reg->type)); return -EACCES; + } buf_info = "rdonly"; max_access = &env->prog->aux->max_rdonly_access; -- GitLab From 37fad50e16ff9d43f46c12bc297132a0277b3b8e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 31 May 2022 15:07:49 +0200 Subject: [PATCH 0346/4335] ALSA: usb-audio: Optimize TEAC clock quirk commit 3753fcc22974affa26160ce1c46a6ebaaaa86758 upstream. Maris found out that the quirk for TEAC devices to work around the clock setup is needed to apply only when the base clock is changed, e.g. from 48000-based clocks (48000, 96000, 192000, 384000) to 44100-based clocks (44100, 88200, 176400, 352800), or vice versa, while switching to another clock with the same base clock doesn't need the (forcible) interface setup. This patch implements the optimization for the TEAC clock quirk to avoid the unnecessary interface re-setup. Fixes: 5ce0b06ae5e6 ("ALSA: usb-audio: Workaround for clock setup on TEAC devices") Reported-by: Maris Abele Cc: Link: https://lore.kernel.org/r/20220531130749.30357-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/clock.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index a87aec3ac242e..ccca9efa7d33f 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -573,10 +573,14 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip, } /* FIXME - TEAC devices require the immediate interface setup */ - if (rate != prev_rate && USB_ID_VENDOR(chip->usb_id) == 0x0644) { - usb_set_interface(chip->dev, fmt->iface, fmt->altsetting); - if (chip->quirk_flags & QUIRK_FLAG_IFACE_DELAY) - msleep(50); + if (USB_ID_VENDOR(chip->usb_id) == 0x0644) { + bool cur_base_48k = (rate % 48000 == 0); + bool prev_base_48k = (prev_rate % 48000 == 0); + if (cur_base_48k != prev_base_48k) { + usb_set_interface(chip->dev, fmt->iface, fmt->altsetting); + if (chip->quirk_flags & QUIRK_FLAG_IFACE_DELAY) + msleep(50); + } } validation: -- GitLab From 207ca688162d4d77129981a8b4352114b97a52b5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 6 Jun 2022 08:43:42 +0200 Subject: [PATCH 0347/4335] Linux 5.15.45 Link: https://lore.kernel.org/r/20220603173820.663747061@linuxfoundation.org Tested-by: Fox Chen Tested-by: Sudip Mukherjee Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Tested-by: Bagas Sanjaya Tested-by: Ron Economos Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b8ce2ba174862..e58d682071a89 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 44 +SUBLEVEL = 45 EXTRAVERSION = NAME = Trick or Treat -- GitLab From fced6c0622da59939832d7b72e61e6e95925330a Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 14 Apr 2022 11:10:18 +0200 Subject: [PATCH 0348/4335] binfmt_flat: do not stop relocating GOT entries prematurely on riscv commit 6045ab5fea4c849153ebeb0acb532da5f29d69c4 upstream. bFLT binaries are usually created using elf2flt. The linker script used by elf2flt has defined the .data section like the following for the last 19 years: .data : { _sdata = . ; __data_start = . ; data_start = . ; *(.got.plt) *(.got) FILL(0) ; . = ALIGN(0x20) ; LONG(-1) . = ALIGN(0x20) ; ... } It places the .got.plt input section before the .got input section. The same is true for the default linker script (ld --verbose) on most architectures except x86/x86-64. The binfmt_flat loader should relocate all GOT entries until it encounters a -1 (the LONG(-1) in the linker script). The problem is that the .got.plt input section starts with a GOTPLT header (which has size 16 bytes on elf64-riscv and 8 bytes on elf32-riscv), where the first word is set to -1. See the binutils implementation for riscv [1]. This causes the binfmt_flat loader to stop relocating GOT entries prematurely and thus causes the application to crash when running. Fix this by skipping the whole GOTPLT header, since the whole GOTPLT header is reserved for the dynamic linker. The GOTPLT header will only be skipped for bFLT binaries with flag FLAT_FLAG_GOTPIC set. This flag is unconditionally set by elf2flt if the supplied ELF binary has the symbol _GLOBAL_OFFSET_TABLE_ defined. ELF binaries without a .got input section should thus remain unaffected. Tested on RISC-V Canaan Kendryte K210 and RISC-V QEMU nommu_virt_defconfig. [1] https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=bfd/elfnn-riscv.c;hb=binutils-2_38#l3275 Cc: Signed-off-by: Niklas Cassel Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20220414091018.896737-1-niklas.cassel@wdc.com Fixed-by: kernel test robot Link: https://lore.kernel.org/lkml/202204182333.OIUOotK8-lkp@intel.com Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/binfmt_flat.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 5d776f80ee50c..7ca3e0db06ffa 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -433,6 +433,30 @@ static void old_reloc(unsigned long rl) /****************************************************************************/ +static inline u32 __user *skip_got_header(u32 __user *rp) +{ + if (IS_ENABLED(CONFIG_RISCV)) { + /* + * RISC-V has a 16 byte GOT PLT header for elf64-riscv + * and 8 byte GOT PLT header for elf32-riscv. + * Skip the whole GOT PLT header, since it is reserved + * for the dynamic linker (ld.so). + */ + u32 rp_val0, rp_val1; + + if (get_user(rp_val0, rp)) + return rp; + if (get_user(rp_val1, rp + 1)) + return rp; + + if (rp_val0 == 0xffffffff && rp_val1 == 0xffffffff) + rp += 4; + else if (rp_val0 == 0xffffffff) + rp += 2; + } + return rp; +} + static int load_flat_file(struct linux_binprm *bprm, struct lib_info *libinfo, int id, unsigned long *extra_stack) { @@ -782,7 +806,8 @@ static int load_flat_file(struct linux_binprm *bprm, * image. */ if (flags & FLAT_FLAG_GOTPIC) { - for (rp = (u32 __user *)datapos; ; rp++) { + rp = skip_got_header((u32 __user *) datapos); + for (; ; rp++) { u32 addr, rp_val; if (get_user(rp_val, rp)) return -EFAULT; -- GitLab From 8b8fe78cae1d09229f544f4129b8c87ca7a8f367 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 2 Jun 2022 13:50:44 +0200 Subject: [PATCH 0349/4335] parisc/stifb: Implement fb_is_primary_device() commit cf936af790a3ef5f41ff687ec91bfbffee141278 upstream. Implement fb_is_primary_device() function, so that fbcon detects if this framebuffer belongs to the default graphics card which was used to start the system. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/fb.h | 4 ++++ drivers/video/console/sticore.c | 17 +++++++++++++++++ drivers/video/fbdev/stifb.c | 4 ++-- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/fb.h b/arch/parisc/include/asm/fb.h index c4cd6360f9964..d63a2acb91f2b 100644 --- a/arch/parisc/include/asm/fb.h +++ b/arch/parisc/include/asm/fb.h @@ -12,9 +12,13 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; } +#if defined(CONFIG_STI_CONSOLE) || defined(CONFIG_FB_STI) +int fb_is_primary_device(struct fb_info *info); +#else static inline int fb_is_primary_device(struct fb_info *info) { return 0; } +#endif #endif /* _ASM_FB_H_ */ diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c index f869b723494f1..62005064911b6 100644 --- a/drivers/video/console/sticore.c +++ b/drivers/video/console/sticore.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "../fbdev/sticore.h" @@ -1127,6 +1128,22 @@ int sti_call(const struct sti_struct *sti, unsigned long func, return ret; } +/* check if given fb_info is the primary device */ +int fb_is_primary_device(struct fb_info *info) +{ + struct sti_struct *sti; + + sti = sti_get_rom(0); + + /* if no built-in graphics card found, allow any fb driver as default */ + if (!sti) + return true; + + /* return true if it's the default built-in framebuffer driver */ + return (sti->info == info); +} +EXPORT_SYMBOL(fb_is_primary_device); + MODULE_AUTHOR("Philipp Rumpf, Helge Deller, Thomas Bogendoerfer"); MODULE_DESCRIPTION("Core STI driver for HP's NGLE series graphics cards in HP PARISC machines"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 265865610edc6..002f265d8db58 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -1317,11 +1317,11 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) goto out_err3; } + /* save for primary gfx device detection & unregister_framebuffer() */ + sti->info = info; if (register_framebuffer(&fb->info) < 0) goto out_err4; - sti->info = info; /* save for unregister_framebuffer() */ - fb_info(&fb->info, "%s %dx%d-%d frame buffer device, %s, id: %04x, mmio: 0x%04lx\n", fix->id, var->xres, -- GitLab From e639a66ca9b846557dcf90823db99cc858d76e80 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 2 Jun 2022 13:55:26 +0200 Subject: [PATCH 0350/4335] parisc/stifb: Keep track of hardware path of graphics card commit b046f984814af7985f444150ec28716d42d00d9a upstream. Keep the pa_path (hardware path) of the graphics card in sti_struct and use this info to give more useful info which card is currently being used. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Greg Kroah-Hartman --- drivers/video/console/sticon.c | 5 ++++- drivers/video/console/sticore.c | 15 +++++++-------- drivers/video/fbdev/sticore.h | 3 +++ 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c index 40496e9e9b438..f304163e87e99 100644 --- a/drivers/video/console/sticon.c +++ b/drivers/video/console/sticon.c @@ -46,6 +46,7 @@ #include #include #include +#include #include @@ -392,7 +393,9 @@ static int __init sticonsole_init(void) for (i = 0; i < MAX_NR_CONSOLES; i++) font_data[i] = STI_DEF_FONT; - pr_info("sticon: Initializing STI text console.\n"); + pr_info("sticon: Initializing STI text console on %s at [%s]\n", + sticon_sti->sti_data->inq_outptr.dev_name, + sticon_sti->pa_path); console_lock(); err = do_take_over_console(&sti_con, 0, MAX_NR_CONSOLES - 1, PAGE0->mem_cons.cl_class != CL_DUPLEX); diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c index 62005064911b6..6a947ff96d6eb 100644 --- a/drivers/video/console/sticore.c +++ b/drivers/video/console/sticore.c @@ -34,7 +34,7 @@ #include "../fbdev/sticore.h" -#define STI_DRIVERVERSION "Version 0.9b" +#define STI_DRIVERVERSION "Version 0.9c" static struct sti_struct *default_sti __read_mostly; @@ -503,7 +503,7 @@ sti_select_fbfont(struct sti_cooked_rom *cooked_rom, const char *fbfont_name) if (!fbfont) return NULL; - pr_info("STI selected %ux%u framebuffer font %s for sticon\n", + pr_info(" using %ux%u framebuffer font %s\n", fbfont->width, fbfont->height, fbfont->name); bpc = ((fbfont->width+7)/8) * fbfont->height; @@ -947,6 +947,7 @@ out_err: static void sticore_check_for_default_sti(struct sti_struct *sti, char *path) { + pr_info(" located at [%s]\n", sti->pa_path); if (strcmp (path, default_sti_path) == 0) default_sti = sti; } @@ -958,7 +959,6 @@ static void sticore_check_for_default_sti(struct sti_struct *sti, char *path) */ static int __init sticore_pa_init(struct parisc_device *dev) { - char pa_path[21]; struct sti_struct *sti = NULL; int hpa = dev->hpa.start; @@ -971,8 +971,8 @@ static int __init sticore_pa_init(struct parisc_device *dev) if (!sti) return 1; - print_pa_hwpath(dev, pa_path); - sticore_check_for_default_sti(sti, pa_path); + print_pa_hwpath(dev, sti->pa_path); + sticore_check_for_default_sti(sti, sti->pa_path); return 0; } @@ -1008,9 +1008,8 @@ static int sticore_pci_init(struct pci_dev *pd, const struct pci_device_id *ent) sti = sti_try_rom_generic(rom_base, fb_base, pd); if (sti) { - char pa_path[30]; - print_pci_hwpath(pd, pa_path); - sticore_check_for_default_sti(sti, pa_path); + print_pci_hwpath(pd, sti->pa_path); + sticore_check_for_default_sti(sti, sti->pa_path); } if (!sti) { diff --git a/drivers/video/fbdev/sticore.h b/drivers/video/fbdev/sticore.h index c338f7848ae2b..0ebdd28a0b813 100644 --- a/drivers/video/fbdev/sticore.h +++ b/drivers/video/fbdev/sticore.h @@ -370,6 +370,9 @@ struct sti_struct { /* pointer to all internal data */ struct sti_all_data *sti_data; + + /* pa_path of this device */ + char pa_path[24]; }; -- GitLab From 87fd2d27c9ead8fc0764acdbd3ab5dd105d5800a Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Wed, 18 May 2022 09:34:28 +0800 Subject: [PATCH 0351/4335] RISC-V: Mark IORESOURCE_EXCLUSIVE for reserved mem instead of IORESOURCE_BUSY commit e61bf5c071148c80d091f8e7220b3b9130780ae3 upstream. Commit 00ab027a3b82 ("RISC-V: Add kernel image sections to the resource tree") marked IORESOURCE_BUSY for reserved memory, which caused resource map failed in subsequent operations of related driver, so remove the IORESOURCE_BUSY flag. In order to prohibit userland mapping reserved memory, mark IORESOURCE_EXCLUSIVE for it. The code to reproduce the issue, dts: mem0: memory@a0000000 { reg = <0x0 0xa0000000 0 0x1000000>; no-map; }; &test { status = "okay"; memory-region = <&mem0>; }; code: np = of_parse_phandle(pdev->dev.of_node, "memory-region", 0); ret = of_address_to_resource(np, 0, &r); base = devm_ioremap_resource(&pdev->dev, &r); // base = -EBUSY Fixes: 00ab027a3b82 ("RISC-V: Add kernel image sections to the resource tree") Reported-by: Huaming Jiang Reviewed-by: Guo Ren Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Co-developed-by: Nick Kossifidis Signed-off-by: Xianting Tian Link: https://lore.kernel.org/r/20220518013428.1338983-1-xianting.tian@linux.alibaba.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 6c5caf5eb9061..50c089730e570 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -199,7 +199,7 @@ static void __init init_resources(void) res = &mem_res[res_idx--]; res->name = "Reserved"; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_MEM | IORESOURCE_EXCLUSIVE; res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1; @@ -224,7 +224,7 @@ static void __init init_resources(void) if (unlikely(memblock_is_nomap(region))) { res->name = "Reserved"; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_MEM | IORESOURCE_EXCLUSIVE; } else { res->name = "System RAM"; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; -- GitLab From 1b1dfd94b2a520a84732ba4a3be719917b5fb29a Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 6 Dec 2021 11:46:56 +0100 Subject: [PATCH 0352/4335] riscv: Initialize thread pointer before calling C functions commit 35d33c76d68dfacc330a8eb477b51cc647c5a847 upstream. Because of the stack canary feature that reads from the current task structure the stack canary value, the thread pointer register "tp" must be set before calling any C function from head.S: by chance, setup_vm and all the functions that it calls does not seem to be part of the functions where the canary check is done, but in the following commits, some functions will. Fixes: f2c9699f65557a31 ("riscv: Add STACKPROTECTOR supported") Signed-off-by: Alexandre Ghiti Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/head.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 52c5ff9804c55..4c3c7592b6fc8 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -301,6 +301,7 @@ clear_bss_done: REG_S a0, (a2) /* Initialize page tables and relocate to virtual addresses */ + la tp, init_task la sp, init_thread_union + THREAD_SIZE XIP_FIXUP_OFFSET sp #ifdef CONFIG_BUILTIN_DTB -- GitLab From 0179777e29d4c20db8aba3e56d90c79d089aa6ed Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Apr 2022 22:00:23 -0500 Subject: [PATCH 0353/4335] riscv: Fix irq_work when SMP is disabled commit 2273272823db6f67d57761df8116ae32e7f05bed upstream. irq_work is triggered via an IPI, but the IPI infrastructure is not included in uniprocessor kernels. As a result, irq_work never runs. Fall back to the tick-based irq_work implementation on uniprocessor configurations. Fixes: 298447928bb1 ("riscv: Support irq_work via self IPIs") Signed-off-by: Samuel Holland Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20220430030025.58405-1-samuel@sholland.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/irq_work.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/irq_work.h b/arch/riscv/include/asm/irq_work.h index d6c277992f76a..b53891964ae03 100644 --- a/arch/riscv/include/asm/irq_work.h +++ b/arch/riscv/include/asm/irq_work.h @@ -4,7 +4,7 @@ static inline bool arch_irq_work_has_interrupt(void) { - return true; + return IS_ENABLED(CONFIG_SMP); } extern void arch_irq_work_raise(void); #endif /* _ASM_RISCV_IRQ_WORK_H */ -- GitLab From c64d777c91e013311a690db24de95b7cd0ffebb6 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 5 May 2022 10:18:15 +0200 Subject: [PATCH 0354/4335] riscv: Wire up memfd_secret in UAPI header commit 02d88b40cb2e9614e0117c3385afdce878f0d377 upstream. Move the __ARCH_WANT_MEMFD_SECRET define added in commit 7bb7f2ac24a0 ("arch, mm: wire up memfd_secret system call where relevant") to so __NR_memfd_secret is defined when including in userspace. This allows the memfd_secret selftest to pass on riscv. Signed-off-by: Tobias Klauser Link: https://lore.kernel.org/r/20220505081815.22808-1-tklauser@distanz.ch Fixes: 7bb7f2ac24a0 ("arch, mm: wire up memfd_secret system call where relevant") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/unistd.h | 1 - arch/riscv/include/uapi/asm/unistd.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h index 6c316093a1e59..977ee6181dabf 100644 --- a/arch/riscv/include/asm/unistd.h +++ b/arch/riscv/include/asm/unistd.h @@ -9,7 +9,6 @@ */ #define __ARCH_WANT_SYS_CLONE -#define __ARCH_WANT_MEMFD_SECRET #include diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h index 8062996c2dfd0..d95fbf5846b0b 100644 --- a/arch/riscv/include/uapi/asm/unistd.h +++ b/arch/riscv/include/uapi/asm/unistd.h @@ -21,6 +21,7 @@ #endif /* __LP64__ */ #define __ARCH_WANT_SYS_CLONE3 +#define __ARCH_WANT_MEMFD_SECRET #include -- GitLab From 7b50d7cc2895cdf017a8f686a5c1a587a4c4be98 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 16 May 2022 14:45:21 -0700 Subject: [PATCH 0355/4335] riscv: Move alternative length validation into subsection commit 61114e734ccb804bc12561ab4020745e02c468c2 upstream. After commit 49b290e430d3 ("riscv: prevent compressed instructions in alternatives"), builds with LLVM's integrated assembler fail: In file included from arch/riscv/mm/init.c:10: In file included from ./include/linux/mm.h:29: In file included from ./include/linux/pgtable.h:6: In file included from ./arch/riscv/include/asm/pgtable.h:108: ./arch/riscv/include/asm/tlbflush.h:23:2: error: expected assembly-time absolute expression ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); ^ ./arch/riscv/include/asm/errata_list.h:33:5: note: expanded from macro 'ALT_FLUSH_TLB_PAGE' asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \ ^ ./arch/riscv/include/asm/alternative-macros.h:187:2: note: expanded from macro 'ALTERNATIVE' _ALTERNATIVE_CFG(old_content, new_content, vendor_id, errata_id, CONFIG_k) ^ ./arch/riscv/include/asm/alternative-macros.h:113:2: note: expanded from macro '_ALTERNATIVE_CFG' __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)) ^ ./arch/riscv/include/asm/alternative-macros.h:110:2: note: expanded from macro '__ALTERNATIVE_CFG' ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) ^ ./arch/riscv/include/asm/alternative-macros.h:99:3: note: expanded from macro 'ALT_NEW_CONTENT' ".org . - (889b - 888b) + (887b - 886b)\n" \ ^ :26:6: note: instantiated into assembly here .org . - (889b - 888b) + (887b - 886b) ^ This error happens because LLVM's integrated assembler has a one-pass design, which means it cannot figure out the instruction lengths when the .org directive is outside of the subsection that contains the instructions, which was changed by the .option directives added by the above change. Move the .org directives before the .previous directive so that these directives are always within the same subsection, which resolves the failures and does not introduce any new issues with GNU as. This was done for arm64 in commit 966a0acce2fc ("arm64/alternatives: move length validation inside the subsection") and commit 22315a2296f4 ("arm64: alternatives: Move length validation in alternative_{insn, endif}"). While there is no error from the assembly versions of the macro, they appear to have the same problem so just make the same change there as well so that there are no problems in the future. Link: https://github.com/ClangBuiltLinux/linux/issues/1640 Reported-by: kernel test robot Signed-off-by: Nathan Chancellor Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Link: https://lore.kernel.org/r/20220516214520.3252074-1-nathan@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/alternative-macros.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index 67406c3763890..0377ce0fcc726 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -23,9 +23,9 @@ 888 : \new_c 889 : - .previous .org . - (889b - 888b) + (887b - 886b) .org . - (887b - 886b) + (889b - 888b) + .previous .endif .endm @@ -60,9 +60,9 @@ "888 :\n" \ new_c "\n" \ "889 :\n" \ - ".previous\n" \ ".org . - (887b - 886b) + (889b - 888b)\n" \ ".org . - (889b - 888b) + (887b - 886b)\n" \ + ".previous\n" \ ".endif\n" #define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable) \ -- GitLab From 0e06c3b43ddc4b5011d935f8ee68abb7f134a9c2 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 24 May 2022 16:14:04 +0800 Subject: [PATCH 0356/4335] ALSA: hda/realtek - Add new type for ALC245 commit 60571929d06b028800f27b51a7c81de1144944cf upstream. Add new type for ALC245. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/cef26a7cd3d146eb96a3994ce79e34d2@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 040825ea9a089..12726d1febbc5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3126,6 +3126,7 @@ enum { ALC269_TYPE_ALC257, ALC269_TYPE_ALC215, ALC269_TYPE_ALC225, + ALC269_TYPE_ALC245, ALC269_TYPE_ALC287, ALC269_TYPE_ALC294, ALC269_TYPE_ALC300, @@ -3163,6 +3164,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec) case ALC269_TYPE_ALC257: case ALC269_TYPE_ALC215: case ALC269_TYPE_ALC225: + case ALC269_TYPE_ALC245: case ALC269_TYPE_ALC287: case ALC269_TYPE_ALC294: case ALC269_TYPE_ALC300: @@ -3690,7 +3692,8 @@ static void alc225_init(struct hda_codec *codec) hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp1_pin_sense, hp2_pin_sense; - if (spec->codec_variant != ALC269_TYPE_ALC287) + if (spec->codec_variant != ALC269_TYPE_ALC287 && + spec->codec_variant != ALC269_TYPE_ALC245) /* required only at boot or S3 and S4 resume time */ if (!spec->done_hp_init || is_s3_resume(codec) || @@ -9879,7 +9882,10 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0245: case 0x10ec0285: case 0x10ec0289: - spec->codec_variant = ALC269_TYPE_ALC215; + if (alc_get_coef0(codec) & 0x0010) + spec->codec_variant = ALC269_TYPE_ALC245; + else + spec->codec_variant = ALC269_TYPE_ALC215; spec->shutup = alc225_shutup; spec->init_hook = alc225_init; spec->gen.mixer_nid = 0; -- GitLab From 99137afd8445ee99fca8418aca3b885f9bebebff Mon Sep 17 00:00:00 2001 From: Rik van der Kemp Date: Fri, 27 May 2022 14:07:26 +0200 Subject: [PATCH 0357/4335] ALSA: hda/realtek: Enable 4-speaker output for Dell XPS 15 9520 laptop commit 15dad62f4bdb5dc0f0efde8181d680db9963544c upstream. The 2022-model XPS 15 appears to use the same 4-speakers-on-ALC289 audio setup as the Dell XPS 15 9510, so requires the same quirk to enable woofer output. Tested on my own 9520. [ Move the entry to the right position in the SSID order -- tiwai ] BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=216035 Cc: Signed-off-by: Rik van der Kemp Link: https://lore.kernel.org/r/181056a137b.d14baf90133058.8425453735588429828@upto11.nl Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 12726d1febbc5..1ff988f2f13f1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8713,6 +8713,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0a62, "Dell Precision 5560", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0a9d, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0a9e, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0b19, "Dell XPS 15 9520", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), -- GitLab From 3e7e75378de55d605b6da2da4110ee7ce7367405 Mon Sep 17 00:00:00 2001 From: Marios Levogiannis Date: Mon, 30 May 2022 10:41:31 +0300 Subject: [PATCH 0358/4335] ALSA: hda/realtek - Fix microphone noise on ASUS TUF B550M-PLUS commit 9bfa7b36343c7d84370bc61c9ed774635b05e4eb upstream. Set microphone pins 0x18 (rear) and 0x19 (front) to VREF_50 to fix the microphone noise on ASUS TUF B550M-PLUS which uses the ALCS1200A codec. The initial value was VREF_80. The same issue is also present on Windows using both the default Windows driver and all tested Realtek drivers before version 6.0.9049.1. Comparing Realtek driver 6.0.9049.1 (the first one without the microphone noise) to Realtek driver 6.0.9047.1 (the last one with the microphone noise) revealed that the fix is the result of setting pins 0x18 and 0x19 to VREF_50. This fix may also work for other boards that have been reported to have the same microphone issue and use the ALC1150 and ALCS1200A codecs, since these codecs are similar and the fix in the Realtek driver on Windows is common for both. However, it is currently enabled only for ASUS TUF B550M-PLUS as this is the only board that could be tested. Signed-off-by: Marios Levogiannis Cc: Link: https://lore.kernel.org/r/20220530074131.12258-1-marios.levogiannis@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1ff988f2f13f1..2b5a610744f61 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1976,6 +1976,7 @@ enum { ALC1220_FIXUP_CLEVO_PB51ED_PINS, ALC887_FIXUP_ASUS_AUDIO, ALC887_FIXUP_ASUS_HMIC, + ALCS1200A_FIXUP_MIC_VREF, }; static void alc889_fixup_coef(struct hda_codec *codec, @@ -2521,6 +2522,14 @@ static const struct hda_fixup alc882_fixups[] = { .chained = true, .chain_id = ALC887_FIXUP_ASUS_AUDIO, }, + [ALCS1200A_FIXUP_MIC_VREF] = { + .type = HDA_FIXUP_PINCTLS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, PIN_VREF50 }, /* rear mic */ + { 0x19, PIN_VREF50 }, /* front mic */ + {} + } + }, }; static const struct snd_pci_quirk alc882_fixup_tbl[] = { @@ -2558,6 +2567,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x835f, "Asus Eee 1601", ALC888_FIXUP_EEE1601), SND_PCI_QUIRK(0x1043, 0x84bc, "ASUS ET2700", ALC887_FIXUP_ASUS_BASS), SND_PCI_QUIRK(0x1043, 0x8691, "ASUS ROG Ranger VIII", ALC882_FIXUP_GPIO3), + SND_PCI_QUIRK(0x1043, 0x8797, "ASUS TUF B550M-PLUS", ALCS1200A_FIXUP_MIC_VREF), SND_PCI_QUIRK(0x104d, 0x9043, "Sony Vaio VGC-LN51JGB", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9044, "Sony VAIO AiO", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC889_FIXUP_VAIO_TT), -- GitLab From 11868ca21585561659c2575b0d6508ef8e9c4291 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 May 2022 15:12:03 +0200 Subject: [PATCH 0359/4335] ALSA: usb-audio: Cancel pending work at closing a MIDI substream commit 0125de38122f0f66bf61336158d12a1aabfe6425 upstream. At closing a USB MIDI output substream, there might be still a pending work, which would eventually access the rawmidi runtime object that is being released. For fixing the race, make sure to cancel the pending work at closing. Reported-by: syzbot+6912c9592caca7ca0e7d@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/000000000000e7e75005dfd07cf6@google.com Link: https://lore.kernel.org/r/20220525131203.11299-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/midi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 7c6ca2b433a53..344fbeadf161b 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1145,6 +1145,9 @@ static int snd_usbmidi_output_open(struct snd_rawmidi_substream *substream) static int snd_usbmidi_output_close(struct snd_rawmidi_substream *substream) { + struct usbmidi_out_port *port = substream->runtime->private_data; + + cancel_work_sync(&port->ep->work); return substream_open(substream, 0, 0); } -- GitLab From 2514cc471b408d405d5ebd6c1d7e2a93768f61c1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 17 May 2022 18:17:36 +0200 Subject: [PATCH 0360/4335] USB: serial: pl2303: fix type detection for odd device commit e82e7c6dde91acd6748d672a44dc1980ce239f86 upstream. At least one pl2303 device has a bcdUSB of 1.0.1 which most likely was was intended as 1.1. Allow bcdDevice 1.0.1 but interpret it as 1.1. Fixes: 1e9faef4d26d ("USB: serial: pl2303: fix HX type detection") Cc: stable@vger.kernel.org # 5.13 Link: https://lore.kernel.org/linux-usb/CAJixRzqf4a9-ZKZDgWxicc_BpfdZVE9qqGmkiO7xEstOXUbGvQ@mail.gmail.com Reported-by: Gary van der Merwe Link: https://lore.kernel.org/r/20220517161736.13313-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 1d878d05a6584..3506c47e1eef0 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -421,6 +421,9 @@ static int pl2303_detect_type(struct usb_serial *serial) bcdUSB = le16_to_cpu(desc->bcdUSB); switch (bcdUSB) { + case 0x101: + /* USB 1.0.1? Let's assume they meant 1.1... */ + fallthrough; case 0x110: switch (bcdDevice) { case 0x300: -- GitLab From c2dd96835fbb8751ece5f524e03733715cf53e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20Yin=28=E6=AE=B7=E5=BC=A0=E6=88=90=29?= Date: Thu, 19 May 2022 02:34:43 +0000 Subject: [PATCH 0361/4335] USB: serial: option: add Quectel BG95 modem commit 33b7af2f459df453feb0d44628d820c47fefe7a8 upstream. The BG95 modem has 3 USB configurations that are configurable via the AT command AT+QCFGEXT="usbnet",["ecm"|"modem"|"rmnet"] which make the modem enumerate with the following interfaces, respectively: "modem": Diag + GNSS + Modem + Modem "ecm" : Diag + GNSS + Modem + ECM "rmnet": Diag + GNSS + Modem + QMI Don't support Full QMI messages (e.g WDS_START_NETWORK_INTERFACE) A detailed description of the USB configuration for each mode follows: +QCFGEXT: "usbnet","modem" -------------------------- T: Bus=01 Lev=02 Prnt=02 Port=01 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0700 Rev= 0.00 S: Manufacturer=Quectel, Incorporated S: Product=Quectel LPWA Module S: SerialNumber=884328a2 C:* #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +QCFGEXT: "usbnet","ecm" ------------------------ T: Bus=01 Lev=02 Prnt=02 Port=01 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0700 Rev= 0.00 S: Manufacturer=Quectel, Incorporated S: Product=Quectel LPWA Module S: SerialNumber=884328a2 C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA A: FirstIf#= 3 IfCount= 2 Cls=02(comm.) Sub=00 Prot=00 I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms I: If#= 4 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 4 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +QCFGEXT: "usbnet","rmnet" -------------------------- T: Bus=01 Lev=02 Prnt=02 Port=01 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0700 Rev= 0.00 S: Manufacturer=Quectel, Incorporated S: Product=Quectel LPWA Module S: SerialNumber=884328a2 C:* #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Carl Yin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 152ad882657d7..e60425bbf5376 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1137,6 +1137,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0xff, 0x30) }, /* EM160R-GL */ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0, 0) }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, 0x0700, 0xff), /* BG95 */ + .driver_info = RSVD(3) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10), -- GitLab From fdcb8a893aec5d32dce8ec487216fa24dc9fe760 Mon Sep 17 00:00:00 2001 From: Monish Kumar R Date: Fri, 20 May 2022 18:30:44 +0530 Subject: [PATCH 0362/4335] USB: new quirk for Dell Gen 2 devices commit 97fa5887cf283bb75ffff5f6b2c0e71794c02400 upstream. Add USB_QUIRK_NO_LPM and USB_QUIRK_RESET_RESUME quirks for Dell usb gen 2 device to not fail during enumeration. Found this bug on own testing Signed-off-by: Monish Kumar R Cc: stable Link: https://lore.kernel.org/r/20220520130044.17303-1-monish.kumar.r@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 97b44a68668a5..f99a65a64588f 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -510,6 +510,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, + /* DELL USB GEN2 */ + { USB_DEVICE(0x413c, 0xb062), .driver_info = USB_QUIRK_NO_LPM | USB_QUIRK_RESET_RESUME }, + /* VCOM device */ { USB_DEVICE(0x4296, 0x7570), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, -- GitLab From bf2558bbdce3ab1d6bcba09f354914e4515d0a2b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 16 May 2022 11:14:24 +0200 Subject: [PATCH 0363/4335] usb: isp1760: Fix out-of-bounds array access commit 26ae2c942b5702f2e43d36b2a4389cfb7d616b6a upstream. Running the driver through kasan gives an interesting splat: BUG: KASAN: global-out-of-bounds in isp1760_register+0x180/0x70c Read of size 20 at addr f1db2e64 by task swapper/0/1 (...) isp1760_register from isp1760_plat_probe+0x1d8/0x220 (...) This happens because the loop reading the regmap fields for the different ISP1760 variants look like this: for (i = 0; i < HC_FIELD_MAX; i++) { ... } Meaning it expects the arrays to be at least HC_FIELD_MAX - 1 long. However the arrays isp1760_hc_reg_fields[], isp1763_hc_reg_fields[], isp1763_hc_volatile_ranges[] and isp1763_dc_volatile_ranges[] are dynamically sized during compilation. Fix this by putting an empty assignment to the [HC_FIELD_MAX] and [DC_FIELD_MAX] array member at the end of each array. This will make the array one member longer than it needs to be, but avoids the risk of overwriting whatever is inside [HC_FIELD_MAX - 1] and is simple and intuitive to read. Also add comments explaining what is going on. Fixes: 1da9e1c06873 ("usb: isp1760: move to regmap for register access") Cc: stable@vger.kernel.org Cc: Rui Miguel Silva Cc: Dietmar Eggemann Reviewed-by: Rui Miguel Silva Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220516091424.391209-1-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/isp1760/isp1760-core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/isp1760/isp1760-core.c b/drivers/usb/isp1760/isp1760-core.c index d1d9a7d5da175..af88f4fe00d27 100644 --- a/drivers/usb/isp1760/isp1760-core.c +++ b/drivers/usb/isp1760/isp1760-core.c @@ -251,6 +251,8 @@ static const struct reg_field isp1760_hc_reg_fields[] = { [HW_DM_PULLDOWN] = REG_FIELD(ISP176x_HC_OTG_CTRL, 2, 2), [HW_DP_PULLDOWN] = REG_FIELD(ISP176x_HC_OTG_CTRL, 1, 1), [HW_DP_PULLUP] = REG_FIELD(ISP176x_HC_OTG_CTRL, 0, 0), + /* Make sure the array is sized properly during compilation */ + [HC_FIELD_MAX] = {}, }; static const struct reg_field isp1763_hc_reg_fields[] = { @@ -321,6 +323,8 @@ static const struct reg_field isp1763_hc_reg_fields[] = { [HW_DM_PULLDOWN_CLEAR] = REG_FIELD(ISP1763_HC_OTG_CTRL_CLEAR, 2, 2), [HW_DP_PULLDOWN_CLEAR] = REG_FIELD(ISP1763_HC_OTG_CTRL_CLEAR, 1, 1), [HW_DP_PULLUP_CLEAR] = REG_FIELD(ISP1763_HC_OTG_CTRL_CLEAR, 0, 0), + /* Make sure the array is sized properly during compilation */ + [HC_FIELD_MAX] = {}, }; static const struct regmap_range isp1763_hc_volatile_ranges[] = { @@ -405,6 +409,8 @@ static const struct reg_field isp1761_dc_reg_fields[] = { [DC_CHIP_ID_HIGH] = REG_FIELD(ISP176x_DC_CHIPID, 16, 31), [DC_CHIP_ID_LOW] = REG_FIELD(ISP176x_DC_CHIPID, 0, 15), [DC_SCRATCH] = REG_FIELD(ISP176x_DC_SCRATCH, 0, 15), + /* Make sure the array is sized properly during compilation */ + [DC_FIELD_MAX] = {}, }; static const struct regmap_range isp1763_dc_volatile_ranges[] = { @@ -458,6 +464,8 @@ static const struct reg_field isp1763_dc_reg_fields[] = { [DC_CHIP_ID_HIGH] = REG_FIELD(ISP1763_DC_CHIPID_HIGH, 0, 15), [DC_CHIP_ID_LOW] = REG_FIELD(ISP1763_DC_CHIPID_LOW, 0, 15), [DC_SCRATCH] = REG_FIELD(ISP1763_DC_SCRATCH, 0, 15), + /* Make sure the array is sized properly during compilation */ + [DC_FIELD_MAX] = {}, }; static const struct regmap_config isp1763_dc_regmap_conf = { -- GitLab From 5a3eec6f67d37406671e54076640a554253262e0 Mon Sep 17 00:00:00 2001 From: Albert Wang Date: Wed, 18 May 2022 14:13:15 +0800 Subject: [PATCH 0364/4335] usb: dwc3: gadget: Move null pinter check to proper place commit 3c5880745b4439ac64eccdb040e37fc1cc4c5406 upstream. When dwc3_gadget_ep_cleanup_completed_requests() called to dwc3_gadget_giveback() where the dwc3 lock is released, other thread is able to execute. In this situation, usb_ep_disable() gets the chance to clear endpoint descriptor pointer which leds to the null pointer dereference problem. So needs to move the null pointer check to a proper place. Example call stack: Thread#1: dwc3_thread_interrupt() spin_lock -> dwc3_process_event_buf() -> dwc3_process_event_entry() -> dwc3_endpoint_interrupt() -> dwc3_gadget_endpoint_trbs_complete() -> dwc3_gadget_ep_cleanup_completed_requests() ... -> dwc3_giveback() spin_unlock Thread#2 executes Thread#2: configfs_composite_disconnect() -> __composite_disconnect() -> ffs_func_disable() -> ffs_func_set_alt() -> ffs_func_eps_disable() -> usb_ep_disable() wait for dwc3 spin_lock Thread#1 released lock clear endpoint.desc Fixes: 26288448120b ("usb: dwc3: gadget: Fix null pointer exception") Cc: stable Signed-off-by: Albert Wang Link: https://lore.kernel.org/r/20220518061315.3359198-1-albertccwang@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index c32f3116d1a0f..c064ec41bf8c9 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3305,14 +3305,14 @@ static bool dwc3_gadget_endpoint_trbs_complete(struct dwc3_ep *dep, struct dwc3 *dwc = dep->dwc; bool no_started_trb = true; - if (!dep->endpoint.desc) - return no_started_trb; - dwc3_gadget_ep_cleanup_completed_requests(dep, event, status); if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) goto out; + if (!dep->endpoint.desc) + return no_started_trb; + if (usb_endpoint_xfer_isoc(dep->endpoint.desc) && list_empty(&dep->started_list) && (list_empty(&dep->pending_list) || status == -EXDEV)) -- GitLab From 0cebad7ee61a4442cb5b7d4fb7a71e93ff1cb4fb Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 10 May 2022 14:46:29 +0530 Subject: [PATCH 0365/4335] usb: core: hcd: Add support for deferring roothub registration commit a44623d9279086c89f631201d993aa332f7c9e66 upstream. It has been observed with certain PCIe USB cards (like Inateck connected to AM64 EVM or J7200 EVM) that as soon as the primary roothub is registered, port status change is handled even before xHC is running leading to cold plug USB devices not detected. For such cases, registering both the root hubs along with the second HCD is required. Add support for deferring roothub registration in usb_add_hcd(), so that both primary and secondary roothubs are registered along with the second HCD. This patch has been added and reverted earier as it triggered a race in usb device enumeration. That race is now fixed in 5.16-rc3, and in stable back to 5.4 commit 6cca13de26ee ("usb: hub: Fix locking issues with address0_mutex") commit 6ae6dc22d2d1 ("usb: hub: Fix usb enumeration issue due to address0 race") CC: stable@vger.kernel.org # 5.4+ Suggested-by: Mathias Nyman Tested-by: Chris Chiu Acked-by: Alan Stern Signed-off-by: Kishon Vijay Abraham I Link: https://lore.kernel.org/r/20220510091630.16564-2-kishon@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 29 +++++++++++++++++++++++------ include/linux/usb/hcd.h | 2 ++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index dd3c288fa952f..9e28d715fa3b7 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2816,6 +2816,7 @@ int usb_add_hcd(struct usb_hcd *hcd, { int retval; struct usb_device *rhdev; + struct usb_hcd *shared_hcd; if (!hcd->skip_phy_initialization && usb_hcd_is_primary_hcd(hcd)) { hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev); @@ -2976,13 +2977,26 @@ int usb_add_hcd(struct usb_hcd *hcd, goto err_hcd_driver_start; } + /* starting here, usbcore will pay attention to the shared HCD roothub */ + shared_hcd = hcd->shared_hcd; + if (!usb_hcd_is_primary_hcd(hcd) && shared_hcd && HCD_DEFER_RH_REGISTER(shared_hcd)) { + retval = register_root_hub(shared_hcd); + if (retval != 0) + goto err_register_root_hub; + + if (shared_hcd->uses_new_polling && HCD_POLL_RH(shared_hcd)) + usb_hcd_poll_rh_status(shared_hcd); + } + /* starting here, usbcore will pay attention to this root hub */ - retval = register_root_hub(hcd); - if (retval != 0) - goto err_register_root_hub; + if (!HCD_DEFER_RH_REGISTER(hcd)) { + retval = register_root_hub(hcd); + if (retval != 0) + goto err_register_root_hub; - if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) - usb_hcd_poll_rh_status(hcd); + if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) + usb_hcd_poll_rh_status(hcd); + } return retval; @@ -3020,6 +3034,7 @@ EXPORT_SYMBOL_GPL(usb_add_hcd); void usb_remove_hcd(struct usb_hcd *hcd) { struct usb_device *rhdev = hcd->self.root_hub; + bool rh_registered; dev_info(hcd->self.controller, "remove, state %x\n", hcd->state); @@ -3030,6 +3045,7 @@ void usb_remove_hcd(struct usb_hcd *hcd) dev_dbg(hcd->self.controller, "roothub graceful disconnect\n"); spin_lock_irq (&hcd_root_hub_lock); + rh_registered = hcd->rh_registered; hcd->rh_registered = 0; spin_unlock_irq (&hcd_root_hub_lock); @@ -3039,7 +3055,8 @@ void usb_remove_hcd(struct usb_hcd *hcd) cancel_work_sync(&hcd->died_work); mutex_lock(&usb_bus_idr_lock); - usb_disconnect(&rhdev); /* Sets rhdev to NULL */ + if (rh_registered) + usb_disconnect(&rhdev); /* Sets rhdev to NULL */ mutex_unlock(&usb_bus_idr_lock); /* diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 548a028f2dabb..2c1fc9212cf28 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -124,6 +124,7 @@ struct usb_hcd { #define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */ #define HCD_FLAG_DEAD 6 /* controller has died? */ #define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */ +#define HCD_FLAG_DEFER_RH_REGISTER 8 /* Defer roothub registration */ /* The flags can be tested using these macros; they are likely to * be slightly faster than test_bit(). @@ -134,6 +135,7 @@ struct usb_hcd { #define HCD_WAKEUP_PENDING(hcd) ((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING)) #define HCD_RH_RUNNING(hcd) ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING)) #define HCD_DEAD(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEAD)) +#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER)) /* * Specifies if interfaces are authorized by default -- GitLab From 87e9cd4b869219a4dd65155d22c9c17af46c1e18 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 25 Oct 2021 18:48:38 +0300 Subject: [PATCH 0366/4335] fs/ntfs3: Update valid size if -EIOCBQUEUED commit 52e00ea6b26e45fb8159e3b57cdde8d3f9bdd8e9 upstream. Update valid size if write is still in I/O queue. Fixes xfstest generic/240 Fixes: 82cae269cfa9 ("fs/ntfs3: Add initialization of super block") Signed-off-by: Konstantin Komarov Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 859951d785cb2..879952254071f 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -757,6 +757,7 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) loff_t vbo = iocb->ki_pos; loff_t end; int wr = iov_iter_rw(iter) & WRITE; + size_t iter_count = iov_iter_count(iter); loff_t valid; ssize_t ret; @@ -770,10 +771,13 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) wr ? ntfs_get_block_direct_IO_W : ntfs_get_block_direct_IO_R); - if (ret <= 0) + if (ret > 0) + end = vbo + ret; + else if (wr && ret == -EIOCBQUEUED) + end = vbo + iter_count; + else goto out; - end = vbo + ret; valid = ni->i_valid; if (wr) { if (end > valid && !S_ISBLK(inode->i_mode)) { -- GitLab From 3eb42b847e43aba3066a0375c40cc7c5587fc375 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 25 Oct 2021 18:31:28 +0300 Subject: [PATCH 0367/4335] fs/ntfs3: Fix fiemap + fix shrink file size (to remove preallocated space) commit 3880f2b816a7e4ca889b7e8a42e6c62c5706ed36 upstream. Two problems: 1. ntfs3_setattr can't truncate preallocated space; 2. if allocated fragment "cross" valid size, then fragment splits into two parts: - normal part; - unwritten part (here we must return FIEMAP_EXTENT_LAST). Before this commit we returned FIEMAP_EXTENT_LAST for whole fragment. Fixes xfstest generic/092 Fixes: 4342306f0f0d ("fs/ntfs3: Add file operations and implementation") Signed-off-by: Konstantin Komarov Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/file.c | 2 +- fs/ntfs3/frecord.c | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 54b9599640ef4..35ed601aef81e 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -761,7 +761,7 @@ int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, } inode_dio_wait(inode); - if (attr->ia_size < oldsize) + if (attr->ia_size <= oldsize) err = ntfs_truncate(inode, attr->ia_size); else if (attr->ia_size > oldsize) err = ntfs_extend(inode, attr->ia_size, 0, NULL); diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 6f47a9c17f896..18842998c8fa3 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -1964,10 +1964,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, vcn += clen; - if (vbo + bytes >= end) { + if (vbo + bytes >= end) bytes = end - vbo; - flags |= FIEMAP_EXTENT_LAST; - } if (vbo + bytes <= valid) { ; @@ -1977,6 +1975,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, /* vbo < valid && valid < vbo + bytes */ u64 dlen = valid - vbo; + if (vbo + dlen >= end) + flags |= FIEMAP_EXTENT_LAST; + err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen, flags); if (err < 0) @@ -1995,6 +1996,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, flags |= FIEMAP_EXTENT_UNWRITTEN; } + if (vbo + bytes >= end) + flags |= FIEMAP_EXTENT_LAST; + err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags); if (err < 0) break; -- GitLab From dc05aa14d311aa8fd0d265d042b86dd277a411f7 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 22 Oct 2021 17:37:52 +0300 Subject: [PATCH 0368/4335] fs/ntfs3: Keep preallocated only if option prealloc enabled commit e95113ed4d428219e3395044e29f5713fc446720 upstream. If size of file was reduced, we still kept allocated blocks. This commit makes ntfs3 work as other fs like btrfs. Link: https://bugzilla.kernel.org/show_bug.cgi?id=214719 Fixes: 4342306f0f0d ("fs/ntfs3: Add file operations and implementation") Reported-by: Ganapathi Kamath Tested-by: Ganapathi Kamath Reviewed-by: Kari Argillander Signed-off-by: Konstantin Komarov Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 35ed601aef81e..d7128be64cd61 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -494,7 +494,7 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) down_write(&ni->file.run_lock); err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, - &new_valid, true, NULL); + &new_valid, ni->mi.sbi->options->prealloc, NULL); up_write(&ni->file.run_lock); if (new_valid < ni->i_valid) -- GitLab From f45bd2922d66b8195c7c22f19d918eb353cc05ef Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 25 Oct 2021 18:34:06 +0300 Subject: [PATCH 0369/4335] fs/ntfs3: Check new size for limits commit 114346978cf61de02832cc3cc68432a3de70fb38 upstream. We must check size before trying to allocate. Size can be set for example by "ulimit -f". Fixes xfstest generic/228 Fixes: 4342306f0f0d ("fs/ntfs3: Add file operations and implementation") Reviewed-by: Kari Argillander Signed-off-by: Konstantin Komarov Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/file.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index d7128be64cd61..7a678a5b1ca5f 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -661,7 +661,13 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) /* * Normal file: Allocate clusters, do not change 'valid' size. */ - err = ntfs_set_size(inode, max(end, i_size)); + loff_t new_size = max(end, i_size); + + err = inode_newsize_ok(inode, new_size); + if (err) + goto out; + + err = ntfs_set_size(inode, new_size); if (err) goto out; -- GitLab From 8bac05d61f26c04c79b8f4c7f37867e2884dfd86 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Wed, 24 Nov 2021 15:08:19 +0300 Subject: [PATCH 0370/4335] fs/ntfs3: In function ntfs_set_acl_ex do not change inode->i_mode if called from function ntfs_init_acl commit 9186d472ee780fabf74424756c4c00545166157e upstream. ntfs_init_acl sets mode. ntfs_init_acl calls ntfs_set_acl_ex. ntfs_set_acl_ex must not change this mode. Fixes xfstest generic/444 Fixes: be71b5cba2e6 ("fs/ntfs3: Add attrib operations") Reviewed-by: Joe Perches Signed-off-by: Konstantin Komarov Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/xattr.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index afd0ddad826ff..480e737436f9e 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -541,7 +541,7 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu) static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, - int type) + int type, bool init_acl) { const char *name; size_t size, name_len; @@ -554,8 +554,9 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, switch (type) { case ACL_TYPE_ACCESS: - if (acl) { - umode_t mode = inode->i_mode; + /* Do not change i_mode if we are in init_acl */ + if (acl && !init_acl) { + umode_t mode; err = posix_acl_update_mode(mnt_userns, inode, &mode, &acl); @@ -616,7 +617,7 @@ out: int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type) { - return ntfs_set_acl_ex(mnt_userns, inode, acl, type); + return ntfs_set_acl_ex(mnt_userns, inode, acl, type, false); } /* @@ -636,7 +637,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, if (default_acl) { err = ntfs_set_acl_ex(mnt_userns, inode, default_acl, - ACL_TYPE_DEFAULT); + ACL_TYPE_DEFAULT, true); posix_acl_release(default_acl); } else { inode->i_default_acl = NULL; @@ -647,7 +648,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, else { if (!err) err = ntfs_set_acl_ex(mnt_userns, inode, acl, - ACL_TYPE_ACCESS); + ACL_TYPE_ACCESS, true); posix_acl_release(acl); } -- GitLab From d8be98ab88250dc12a98efdb703792a537b0eac3 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 11 Nov 2021 08:45:44 +0100 Subject: [PATCH 0371/4335] fs/ntfs3: Fix some memory leaks in an error handling path of 'log_replay()' commit e589f9b7078e1c0191613cd736f598e81d2390de upstream. All error handling paths lead to 'out' where many resources are freed. Do it as well here instead of a direct return, otherwise 'log', 'ra' and 'log->one_page_buf' (at least) will leak. Fixes: b46acd6a6a62 ("fs/ntfs3: Add NTFS journal") Signed-off-by: Christophe JAILLET Signed-off-by: Konstantin Komarov Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/fslog.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 06492f088d602..915f42cf07bcf 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -4085,8 +4085,10 @@ process_log: if (client == LFS_NO_CLIENT_LE) { /* Insert "NTFS" client LogFile. */ client = ra->client_idx[0]; - if (client == LFS_NO_CLIENT_LE) - return -EINVAL; + if (client == LFS_NO_CLIENT_LE) { + err = -EINVAL; + goto out; + } t16 = le16_to_cpu(client); cr = ca + t16; -- GitLab From 79f44f05e7562707eab6a8d7d2b685f5ad5d5df4 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 22 Oct 2021 18:35:43 +0300 Subject: [PATCH 0372/4335] fs/ntfs3: Update i_ctime when xattr is added commit 2d44667c306e7806848a3478820f87343feb5421 upstream. Ctime wasn't updated after setfacl command. This commit fixes xfstest generic/307 Fixes: be71b5cba2e6 ("fs/ntfs3: Add attrib operations") Signed-off-by: Konstantin Komarov Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/xattr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 480e737436f9e..48cfc42db4c75 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -902,6 +902,9 @@ set_new_fa: err = ntfs_set_ea(inode, name, name_len, value, size, flags); out: + inode->i_ctime = current_time(inode); + mark_inode_dirty(inode); + return err; } -- GitLab From d7b5577272c21614c57bb894211a232107868c98 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 22 Oct 2021 18:15:36 +0300 Subject: [PATCH 0373/4335] fs/ntfs3: Restore ntfs_xattr_get_acl and ntfs_xattr_set_acl functions commit 87e21c99bad763524c953ff4d1a61ee19038ddc2 upstream. Apparently we need to maintain these functions with ntfs_get_acl_ex and ntfs_set_acl_ex. This commit fixes xfstest generic/099 Fixes: 95dd8b2c1ed0 ("fs/ntfs3: Remove unnecessary functions") Reviewed-by: Kari Argillander Signed-off-by: Konstantin Komarov Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/xattr.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 48cfc42db4c75..0968565ff2ca0 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -112,7 +112,7 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea, return -ENOMEM; if (!size) { - ; + /* EA info persists, but xattr is empty. Looks like EA problem. */ } else if (attr_ea->non_res) { struct runs_tree run; @@ -620,6 +620,67 @@ int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, return ntfs_set_acl_ex(mnt_userns, inode, acl, type, false); } +static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns, + struct inode *inode, int type, void *buffer, + size_t size) +{ + struct posix_acl *acl; + int err; + + if (!(inode->i_sb->s_flags & SB_POSIXACL)) { + ntfs_inode_warn(inode, "add mount option \"acl\" to use acl"); + return -EOPNOTSUPP; + } + + acl = ntfs_get_acl(inode, type, false); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + if (!acl) + return -ENODATA; + + err = posix_acl_to_xattr(mnt_userns, acl, buffer, size); + posix_acl_release(acl); + + return err; +} + +static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns, + struct inode *inode, int type, const void *value, + size_t size) +{ + struct posix_acl *acl; + int err; + + if (!(inode->i_sb->s_flags & SB_POSIXACL)) { + ntfs_inode_warn(inode, "add mount option \"acl\" to use acl"); + return -EOPNOTSUPP; + } + + if (!inode_owner_or_capable(mnt_userns, inode)) + return -EPERM; + + if (!value) { + acl = NULL; + } else { + acl = posix_acl_from_xattr(mnt_userns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + if (acl) { + err = posix_acl_valid(mnt_userns, acl); + if (err) + goto release_and_out; + } + } + + err = ntfs_set_acl(mnt_userns, inode, acl, type); + +release_and_out: + posix_acl_release(acl); + return err; +} + /* * ntfs_init_acl - Initialize the ACLs of a new inode. * @@ -786,6 +847,23 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de, goto out; } +#ifdef CONFIG_NTFS3_FS_POSIX_ACL + if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 && + !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS, + sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) || + (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 && + !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, + sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) { + /* TODO: init_user_ns? */ + err = ntfs_xattr_get_acl( + &init_user_ns, inode, + name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 + ? ACL_TYPE_ACCESS + : ACL_TYPE_DEFAULT, + buffer, size); + goto out; + } +#endif /* Deal with NTFS extended attribute. */ err = ntfs_get_ea(inode, name, name_len, buffer, size, NULL); @@ -898,6 +976,22 @@ set_new_fa: goto out; } +#ifdef CONFIG_NTFS3_FS_POSIX_ACL + if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 && + !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS, + sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) || + (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 && + !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, + sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) { + err = ntfs_xattr_set_acl( + mnt_userns, inode, + name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 + ? ACL_TYPE_ACCESS + : ACL_TYPE_DEFAULT, + value, size); + goto out; + } +#endif /* Deal with NTFS extended attribute. */ err = ntfs_set_ea(inode, name, name_len, value, size, flags); -- GitLab From ce0008a0e410cdd95f0d8cd81b2902ec10a660c4 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 31 May 2022 13:01:17 +1000 Subject: [PATCH 0374/4335] cifs: fix potential double free during failed mount commit 8378a51e3f8140f60901fb27208cc7a6e47047b5 upstream. RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=2088799 Cc: stable@vger.kernel.org Signed-off-by: Roberto Bergantinos Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsfs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8f8d281e31510..1e4f5ee9768ab 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -826,7 +826,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, int flags, struct smb3_fs_context *old_ctx) { int rc; - struct super_block *sb; + struct super_block *sb = NULL; struct cifs_sb_info *cifs_sb = NULL; struct cifs_mnt_data mnt_data; struct dentry *root; @@ -922,9 +922,11 @@ out_super: return root; out: if (cifs_sb) { - kfree(cifs_sb->prepath); - smb3_cleanup_fs_context(cifs_sb->ctx); - kfree(cifs_sb); + if (!sb || IS_ERR(sb)) { /* otherwise kill_sb will handle */ + kfree(cifs_sb->prepath); + smb3_cleanup_fs_context(cifs_sb->ctx); + kfree(cifs_sb); + } } return root; } -- GitLab From 791f532de70dae10ae037b8c0df31d355fc27952 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 1 Jun 2022 08:48:38 +1000 Subject: [PATCH 0375/4335] cifs: when extending a file with falloc we should make files not-sparse commit f66f8b94e7f2f4ac9fffe710be231ca8f25c5057 upstream. as this is the only way to make sure the region is allocated. Fix the conditional that was wrong and only tried to make already non-sparse files non-sparse. Cc: stable@vger.kernel.org Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0c1af2dd9069e..df9ba3729d1f8 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3791,7 +3791,7 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, if (rc) goto out; - if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0) + if (cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) smb2_set_sparse(xid, tcon, cfile, inode, false); eof = cpu_to_le64(off + len); -- GitLab From dfe5921112cf5c7be730c88d0d46a6c8cfd13e0c Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 12 May 2022 01:04:50 +0300 Subject: [PATCH 0376/4335] xhci: Allow host runtime PM as default for Intel Alder Lake N xHCI commit 74f55a62c4c354f43a6d75f77dd184c4f57b9a26 upstream. Alder Lake N TCSS xHCI needs to be runtime suspended whenever possible to allow the TCSS hardware block to enter D3 and thus save energy Cc: stable@kernel.org Suggested-by: Gopal Vamshi Krishna Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220511220450.85367-10-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index cb8b481a94990..7d13ddff6b33f 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -59,6 +59,7 @@ #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI 0x9a13 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI 0x1138 #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI 0x461e +#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI 0x464e #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed #define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639 @@ -268,6 +269,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; -- GitLab From d9a04bc7d1b836f43bed8057fa09c7bbae4a4108 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= Date: Tue, 17 May 2022 20:31:30 +0200 Subject: [PATCH 0377/4335] platform/x86: intel-hid: fix _DSM function index handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1620c80bba53af8c547bab34a1d3bc58319fe608 upstream. intel_hid_dsm_fn_mask is a bit mask containing one bit for each function index. Fix the function index check in intel_hid_evaluate_method accordingly, which was missed in commit 97ab4516205e ("platform/x86: intel-hid: fix _DSM function index handling"). Fixes: 97ab4516205e ("platform/x86: intel-hid: fix _DSM function index handling") Cc: stable@vger.kernel.org Signed-off-by: Michael Niewöhner Link: https://lore.kernel.org/r/66f813f5bcc724a0f6dd5adefe6a9728dbe509e3.camel@mniewoehner.de Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel/hid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 41a2a026f1568..e9e8554147e00 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -245,7 +245,7 @@ static bool intel_hid_evaluate_method(acpi_handle handle, method_name = (char *)intel_hid_dsm_fn_to_method[fn_index]; - if (!(intel_hid_dsm_fn_mask & fn_index)) + if (!(intel_hid_dsm_fn_mask & BIT(fn_index))) goto skip_dsm_eval; obj = acpi_evaluate_dsm_typed(handle, &intel_dsm_guid, -- GitLab From cc0dd4456f9573bf8af9b4d8754433918e809e1e Mon Sep 17 00:00:00 2001 From: Ammar Faizi Date: Tue, 29 Mar 2022 17:47:05 +0700 Subject: [PATCH 0378/4335] x86/MCE/AMD: Fix memory leak when threshold_create_bank() fails commit e5f28623ceb103e13fc3d7bd45edf9818b227fd0 upstream. In mce_threshold_create_device(), if threshold_create_bank() fails, the previously allocated threshold banks array @bp will be leaked because the call to mce_threshold_remove_device() will not free it. This happens because mce_threshold_remove_device() fetches the pointer through the threshold_banks per-CPU variable but bp is written there only after the bank creation is successful, and not before, when threshold_create_bank() fails. Add a helper which unwinds all the bank creation work previously done and pass into it the previously allocated threshold banks array for freeing. [ bp: Massage. ] Fixes: 6458de97fc15 ("x86/mce/amd: Straighten CPU hotplug path") Co-developed-by: Alviro Iskandar Setiawan Signed-off-by: Alviro Iskandar Setiawan Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Ammar Faizi Signed-off-by: Borislav Petkov Cc: Link: https://lore.kernel.org/r/20220329104705.65256-3-ammarfaizi2@gnuweeb.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mce/amd.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index c0c57bd05f02d..a873577e49dcc 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1470,10 +1470,23 @@ out_free: kfree(bank); } +static void __threshold_remove_device(struct threshold_bank **bp) +{ + unsigned int bank, numbanks = this_cpu_read(mce_num_banks); + + for (bank = 0; bank < numbanks; bank++) { + if (!bp[bank]) + continue; + + threshold_remove_bank(bp[bank]); + bp[bank] = NULL; + } + kfree(bp); +} + int mce_threshold_remove_device(unsigned int cpu) { struct threshold_bank **bp = this_cpu_read(threshold_banks); - unsigned int bank, numbanks = this_cpu_read(mce_num_banks); if (!bp) return 0; @@ -1484,13 +1497,7 @@ int mce_threshold_remove_device(unsigned int cpu) */ this_cpu_write(threshold_banks, NULL); - for (bank = 0; bank < numbanks; bank++) { - if (bp[bank]) { - threshold_remove_bank(bp[bank]); - bp[bank] = NULL; - } - } - kfree(bp); + __threshold_remove_device(bp); return 0; } @@ -1527,15 +1534,14 @@ int mce_threshold_create_device(unsigned int cpu) if (!(this_cpu_read(bank_map) & (1 << bank))) continue; err = threshold_create_bank(bp, cpu, bank); - if (err) - goto out_err; + if (err) { + __threshold_remove_device(bp); + return err; + } } this_cpu_write(threshold_banks, bp); if (thresholding_irq_en) mce_threshold_vector = amd_threshold_interrupt; return 0; -out_err: - mce_threshold_remove_device(cpu); - return err; } -- GitLab From 7aef4ecc315084e2cab54a307b9b40b380093498 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 25 May 2022 06:39:52 -0700 Subject: [PATCH 0379/4335] perf/x86/intel: Fix event constraints for ICL commit 86dca369075b3e310c3c0adb0f81e513c562b5e4 upstream. According to the latest event list, the event encoding 0x55 INST_DECODED.DECODERS and 0x56 UOPS_DECODED.DEC0 are only available on the first 4 counters. Add them into the event constraints table. Fixes: 6017608936c1 ("perf/x86/intel: Add Icelake support") Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220525133952.1660658-1-kan.liang@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 265cb203d9d5c..588b83cc730d3 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -255,7 +255,7 @@ static struct event_constraint intel_icl_event_constraints[] = { INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf), INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */ - INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf), INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */ INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */ -- GitLab From 8765a423a87d74ef24ea02b43b2728fe4039f248 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 23 Feb 2022 19:32:24 +0800 Subject: [PATCH 0380/4335] x86/kexec: fix memory leak of elf header buffer commit b3e34a47f98974d0844444c5121aaff123004e57 upstream. This is reported by kmemleak detector: unreferenced object 0xffffc900002a9000 (size 4096): comm "kexec", pid 14950, jiffies 4295110793 (age 373.951s) hex dump (first 32 bytes): 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 .ELF............ 04 00 3e 00 01 00 00 00 00 00 00 00 00 00 00 00 ..>............. backtrace: [<0000000016a8ef9f>] __vmalloc_node_range+0x101/0x170 [<000000002b66b6c0>] __vmalloc_node+0xb4/0x160 [<00000000ad40107d>] crash_prepare_elf64_headers+0x8e/0xcd0 [<0000000019afff23>] crash_load_segments+0x260/0x470 [<0000000019ebe95c>] bzImage64_load+0x814/0xad0 [<0000000093e16b05>] arch_kexec_kernel_image_load+0x1be/0x2a0 [<000000009ef2fc88>] kimage_file_alloc_init+0x2ec/0x5a0 [<0000000038f5a97a>] __do_sys_kexec_file_load+0x28d/0x530 [<0000000087c19992>] do_syscall_64+0x3b/0x90 [<0000000066e063a4>] entry_SYSCALL_64_after_hwframe+0x44/0xae In crash_prepare_elf64_headers(), a buffer is allocated via vmalloc() to store elf headers. While it's not freed back to system correctly when kdump kernel is reloaded or unloaded. Then memory leak is caused. Fix it by introducing x86 specific function arch_kimage_file_post_load_cleanup(), and freeing the buffer there. And also remove the incorrect elf header buffer freeing code. Before calling arch specific kexec_file loading function, the image instance has been initialized. So 'image->elf_headers' must be NULL. It doesn't make sense to free the elf header buffer in the place. Three different people have reported three bugs about the memory leak on x86_64 inside Redhat. Link: https://lkml.kernel.org/r/20220223113225.63106-2-bhe@redhat.com Signed-off-by: Baoquan He Acked-by: Dave Young Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/machine_kexec_64.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 131f30fdcfbdc..dc8b17568784f 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -373,9 +373,6 @@ void machine_kexec(struct kimage *image) #ifdef CONFIG_KEXEC_FILE void *arch_kexec_kernel_image_load(struct kimage *image) { - vfree(image->elf_headers); - image->elf_headers = NULL; - if (!image->fops || !image->fops->load) return ERR_PTR(-ENOEXEC); @@ -511,6 +508,15 @@ overflow: (int)ELF64_R_TYPE(rel[i].r_info), value); return -ENOEXEC; } + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + vfree(image->elf_headers); + image->elf_headers = NULL; + image->elf_headers_sz = 0; + + return kexec_image_post_load_cleanup_default(image); +} #endif /* CONFIG_KEXEC_FILE */ static int -- GitLab From 260650ddc864e73cacc5f2f610935d76beb4019f Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Fri, 20 May 2022 10:42:47 -0700 Subject: [PATCH 0381/4335] x86/sgx: Set active memcg prior to shmem allocation commit 0c9782e204d3cc5625b9e8bf4e8625d38dfe0139 upstream. When the system runs out of enclave memory, SGX can reclaim EPC pages by swapping to normal RAM. These backing pages are allocated via a per-enclave shared memory area. Since SGX allows unlimited over commit on EPC memory, the reclaimer thread can allocate a large number of backing RAM pages in response to EPC memory pressure. When the shared memory backing RAM allocation occurs during the reclaimer thread context, the shared memory is charged to the root memory control group, and the shmem usage of the enclave is not properly accounted for, making cgroups ineffective at limiting the amount of RAM an enclave can consume. For example, when using a cgroup to launch a set of test enclaves, the kernel does not properly account for 50% - 75% of shmem page allocations on average. In the worst case, when nearly all allocations occur during the reclaimer thread, the kernel accounts less than a percent of the amount of shmem used by the enclave's cgroup to the correct cgroup. SGX stores a list of mm_structs that are associated with an enclave. Pick one of them during reclaim and charge that mm's memcg with the shmem allocation. The one that gets picked is arbitrary, but this list almost always only has one mm. The cases where there is more than one mm with different memcg's are not worth considering. Create a new function - sgx_encl_alloc_backing(). This function is used whenever a new backing storage page needs to be allocated. Previously the same function was used for page allocation as well as retrieving a previously allocated page. Prior to backing page allocation, if there is a mm_struct associated with the enclave that is requesting the allocation, it is set as the active memory control group. [ dhansen: - fix merge conflict with ELDU fixes - check against actual ksgxd_tsk, not ->mm ] Cc: stable@vger.kernel.org Signed-off-by: Kristen Carlson Accardi Signed-off-by: Dave Hansen Reviewed-by: Shakeel Butt Acked-by: Roman Gushchin Link: https://lkml.kernel.org/r/20220520174248.4918-1-kristen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/sgx/encl.c | 105 ++++++++++++++++++++++++++++++++- arch/x86/kernel/cpu/sgx/encl.h | 7 ++- arch/x86/kernel/cpu/sgx/main.c | 9 ++- 3 files changed, 115 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 3c24e6124d955..19876ebfb5044 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -152,7 +152,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); - ret = sgx_encl_get_backing(encl, page_index, &b); + ret = sgx_encl_lookup_backing(encl, page_index, &b); if (ret) return ret; @@ -718,7 +718,7 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl, * 0 on success, * -errno otherwise. */ -int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, +static int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, struct sgx_backing *backing) { pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); @@ -743,6 +743,107 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, return 0; } +/* + * When called from ksgxd, returns the mem_cgroup of a struct mm stored + * in the enclave's mm_list. When not called from ksgxd, just returns + * the mem_cgroup of the current task. + */ +static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl) +{ + struct mem_cgroup *memcg = NULL; + struct sgx_encl_mm *encl_mm; + int idx; + + /* + * If called from normal task context, return the mem_cgroup + * of the current task's mm. The remainder of the handling is for + * ksgxd. + */ + if (!current_is_ksgxd()) + return get_mem_cgroup_from_mm(current->mm); + + /* + * Search the enclave's mm_list to find an mm associated with + * this enclave to charge the allocation to. + */ + idx = srcu_read_lock(&encl->srcu); + + list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { + if (!mmget_not_zero(encl_mm->mm)) + continue; + + memcg = get_mem_cgroup_from_mm(encl_mm->mm); + + mmput_async(encl_mm->mm); + + break; + } + + srcu_read_unlock(&encl->srcu, idx); + + /* + * In the rare case that there isn't an mm associated with + * the enclave, set memcg to the current active mem_cgroup. + * This will be the root mem_cgroup if there is no active + * mem_cgroup. + */ + if (!memcg) + return get_mem_cgroup_from_mm(NULL); + + return memcg; +} + +/** + * sgx_encl_alloc_backing() - allocate a new backing storage page + * @encl: an enclave pointer + * @page_index: enclave page index + * @backing: data for accessing backing storage for the page + * + * When called from ksgxd, sets the active memcg from one of the + * mms in the enclave's mm_list prior to any backing page allocation, + * in order to ensure that shmem page allocations are charged to the + * enclave. + * + * Return: + * 0 on success, + * -errno otherwise. + */ +int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index, + struct sgx_backing *backing) +{ + struct mem_cgroup *encl_memcg = sgx_encl_get_mem_cgroup(encl); + struct mem_cgroup *memcg = set_active_memcg(encl_memcg); + int ret; + + ret = sgx_encl_get_backing(encl, page_index, backing); + + set_active_memcg(memcg); + mem_cgroup_put(encl_memcg); + + return ret; +} + +/** + * sgx_encl_lookup_backing() - retrieve an existing backing storage page + * @encl: an enclave pointer + * @page_index: enclave page index + * @backing: data for accessing backing storage for the page + * + * Retrieve a backing page for loading data back into an EPC page with ELDU. + * It is the caller's responsibility to ensure that it is appropriate to use + * sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is + * not used correctly, this will cause an allocation which is not accounted for. + * + * Return: + * 0 on success, + * -errno otherwise. + */ +int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, + struct sgx_backing *backing) +{ + return sgx_encl_get_backing(encl, page_index, backing); +} + /** * sgx_encl_put_backing() - Unpin the backing storage * @backing: data for accessing backing storage for the page diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h index d44e7372151f0..332ef3568267e 100644 --- a/arch/x86/kernel/cpu/sgx/encl.h +++ b/arch/x86/kernel/cpu/sgx/encl.h @@ -103,10 +103,13 @@ static inline int sgx_encl_find(struct mm_struct *mm, unsigned long addr, int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start, unsigned long end, unsigned long vm_flags); +bool current_is_ksgxd(void); void sgx_encl_release(struct kref *ref); int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm); -int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, - struct sgx_backing *backing); +int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, + struct sgx_backing *backing); +int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index, + struct sgx_backing *backing); void sgx_encl_put_backing(struct sgx_backing *backing); int sgx_encl_test_and_clear_young(struct mm_struct *mm, struct sgx_encl_page *page); diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 00e09a2b933ac..c93c9f9f8c7b9 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -292,7 +292,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page, sgx_encl_put_backing(backing); if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) { - ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size), + ret = sgx_encl_alloc_backing(encl, PFN_DOWN(encl->size), &secs_backing); if (ret) goto out; @@ -365,7 +365,7 @@ static void sgx_reclaim_pages(void) page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base); mutex_lock(&encl_page->encl->lock); - ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]); + ret = sgx_encl_alloc_backing(encl_page->encl, page_index, &backing[i]); if (ret) { mutex_unlock(&encl_page->encl->lock); goto skip; @@ -462,6 +462,11 @@ static bool __init sgx_page_reclaimer_init(void) return true; } +bool current_is_ksgxd(void) +{ + return current == ksgxd_tsk; +} + static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) { struct sgx_numa_node *node = &sgx_numa_nodes[nid]; -- GitLab From e1c91672c5c835c849e329a29b2a74f8540cd102 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 26 Apr 2022 16:30:17 -0500 Subject: [PATCH 0382/4335] ptrace/um: Replace PT_DTRACE with TIF_SINGLESTEP commit c200e4bb44e80b343c09841e7caaaca0aac5e5fa upstream. User mode linux is the last user of the PT_DTRACE flag. Using the flag to indicate single stepping is a little confusing and worse changing tsk->ptrace without locking could potentionally cause problems. So use a thread info flag with a better name instead of flag in tsk->ptrace. Remove the definition PT_DTRACE as uml is the last user. Cc: stable@vger.kernel.org Acked-by: Johannes Berg Tested-by: Kees Cook Reviewed-by: Oleg Nesterov Link: https://lkml.kernel.org/r/20220505182645.497868-3-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/um/include/asm/thread_info.h | 2 ++ arch/um/kernel/exec.c | 2 +- arch/um/kernel/process.c | 2 +- arch/um/kernel/ptrace.c | 8 ++++---- arch/um/kernel/signal.c | 4 ++-- include/linux/ptrace.h | 1 - 6 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index 3b1cb8b3b1864..e610e932cfe1e 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -64,6 +64,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 7 #define TIF_NOTIFY_RESUME 8 #define TIF_SECCOMP 9 /* secure computing */ +#define TIF_SINGLESTEP 10 /* single stepping userspace */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -72,5 +73,6 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #endif diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 4d84981003419..335dcb2d63e78 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -42,7 +42,7 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; - current->ptrace &= ~PT_DTRACE; + clear_thread_flag(TIF_SINGLESTEP); #ifdef SUBARCH_EXECVE1 SUBARCH_EXECVE1(regs->regs); #endif diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 457a38db368b7..b3fbfca494006 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -339,7 +339,7 @@ int singlestepping(void * t) { struct task_struct *task = t ? t : current; - if (!(task->ptrace & PT_DTRACE)) + if (!test_thread_flag(TIF_SINGLESTEP)) return 0; if (task->thread.singlestep_syscall) diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index b425f47bddbb3..d37802ced5636 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -12,7 +12,7 @@ void user_enable_single_step(struct task_struct *child) { - child->ptrace |= PT_DTRACE; + set_tsk_thread_flag(child, TIF_SINGLESTEP); child->thread.singlestep_syscall = 0; #ifdef SUBARCH_SET_SINGLESTEPPING @@ -22,7 +22,7 @@ void user_enable_single_step(struct task_struct *child) void user_disable_single_step(struct task_struct *child) { - child->ptrace &= ~PT_DTRACE; + clear_tsk_thread_flag(child, TIF_SINGLESTEP); child->thread.singlestep_syscall = 0; #ifdef SUBARCH_SET_SINGLESTEPPING @@ -121,7 +121,7 @@ static void send_sigtrap(struct uml_pt_regs *regs, int error_code) } /* - * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and + * XXX Check TIF_SINGLESTEP for singlestepping check and * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check */ int syscall_trace_enter(struct pt_regs *regs) @@ -145,7 +145,7 @@ void syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit(regs); /* Fake a debug trap */ - if (ptraced & PT_DTRACE) + if (test_thread_flag(TIF_SINGLESTEP)) send_sigtrap(®s->regs, 0); if (!test_thread_flag(TIF_SYSCALL_TRACE)) diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 88cd9b5c1b744..ae4658f576ab7 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -53,7 +53,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) unsigned long sp; int err; - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED)) singlestep = 1; /* Did we come from a system call? */ @@ -128,7 +128,7 @@ void do_signal(struct pt_regs *regs) * on the host. The tracing thread will check this flag and * PTRACE_SYSCALL if necessary. */ - if (current->ptrace & PT_DTRACE) + if (test_thread_flag(TIF_SINGLESTEP)) current->thread.singlestep_syscall = is_syscall(PT_REGS_IP(¤t->thread.regs)); diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index b5ebf6c012924..99610cc363d11 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -30,7 +30,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ #define PT_PTRACED 0x00000001 -#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ #define PT_OPT_FLAG_SHIFT 3 /* PT_TRACE_* event enable flags */ -- GitLab From 7400a7e0a7ae1f1cd22250f0bfa1453f59a1d117 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 26 Apr 2022 16:45:37 -0500 Subject: [PATCH 0383/4335] ptrace/xtensa: Replace PT_SINGLESTEP with TIF_SINGLESTEP commit 4a3d2717d140401df7501a95e454180831a0c5af upstream. xtensa is the last user of the PT_SINGLESTEP flag. Changing tsk->ptrace in user_enable_single_step and user_disable_single_step without locking could potentiallly cause problems. So use a thread info flag instead of a flag in tsk->ptrace. Use TIF_SINGLESTEP that xtensa already had defined but unused. Remove the definitions of PT_SINGLESTEP and PT_BLOCKSTEP as they have no more users. Cc: stable@vger.kernel.org Acked-by: Max Filippov Tested-by: Kees Cook Reviewed-by: Oleg Nesterov Link: https://lkml.kernel.org/r/20220505182645.497868-4-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/ptrace.c | 4 ++-- arch/xtensa/kernel/signal.c | 4 ++-- include/linux/ptrace.h | 6 ------ 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index bb3f4797d212b..db6cdea471d83 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -226,12 +226,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) void user_enable_single_step(struct task_struct *child) { - child->ptrace |= PT_SINGLESTEP; + set_tsk_thread_flag(child, TIF_SINGLESTEP); } void user_disable_single_step(struct task_struct *child) { - child->ptrace &= ~PT_SINGLESTEP; + clear_tsk_thread_flag(child, TIF_SINGLESTEP); } /* diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index c4d77dbfb61af..f2b00f43cf236 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -465,7 +465,7 @@ static void do_signal(struct pt_regs *regs) /* Set up the stack frame */ ret = setup_frame(&ksig, sigmask_to_save(), regs); signal_setup_done(ret, &ksig, 0); - if (current->ptrace & PT_SINGLESTEP) + if (test_thread_flag(TIF_SINGLESTEP)) task_pt_regs(current)->icountlevel = 1; return; @@ -491,7 +491,7 @@ static void do_signal(struct pt_regs *regs) /* If there's no signal to deliver, we just restore the saved mask. */ restore_saved_sigmask(); - if (current->ptrace & PT_SINGLESTEP) + if (test_thread_flag(TIF_SINGLESTEP)) task_pt_regs(current)->icountlevel = 1; return; } diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 99610cc363d11..d695c43fd740d 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -46,12 +46,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) -/* single stepping state bits (used on ARM and PA-RISC) */ -#define PT_SINGLESTEP_BIT 31 -#define PT_SINGLESTEP (1< Date: Fri, 29 Apr 2022 09:23:55 -0500 Subject: [PATCH 0384/4335] ptrace: Reimplement PTRACE_KILL by always sending SIGKILL commit 6a2d90ba027adba528509ffa27097cffd3879257 upstream. The current implementation of PTRACE_KILL is buggy and has been for many years as it assumes it's target has stopped in ptrace_stop. At a quick skim it looks like this assumption has existed since ptrace support was added in linux v1.0. While PTRACE_KILL has been deprecated we can not remove it as a quick search with google code search reveals many existing programs calling it. When the ptracee is not stopped at ptrace_stop some fields would be set that are ignored except in ptrace_stop. Making the userspace visible behavior of PTRACE_KILL a noop in those case. As the usual rules are not obeyed it is not clear what the consequences are of calling PTRACE_KILL on a running process. Presumably userspace does not do this as it achieves nothing. Replace the implementation of PTRACE_KILL with a simple send_sig_info(SIGKILL) followed by a return 0. This changes the observable user space behavior only in that PTRACE_KILL on a process not stopped in ptrace_stop will also kill it. As that has always been the intent of the code this seems like a reasonable change. Cc: stable@vger.kernel.org Reported-by: Al Viro Suggested-by: Al Viro Tested-by: Kees Cook Reviewed-by: Oleg Nesterov Link: https://lkml.kernel.org/r/20220505182645.497868-7-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/step.c | 3 +-- kernel/ptrace.c | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 0f3c307b37b3a..8e2b2552b5eea 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -180,8 +180,7 @@ void set_task_blockstep(struct task_struct *task, bool on) * * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if * task is current or it can't be running, otherwise we can race - * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but - * PTRACE_KILL is not safe. + * with __switch_to_xtra(). We rely on ptrace_freeze_traced(). */ local_irq_disable(); debugctl = get_debugctlmsr(); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 516ad5e65849f..0cf547531ddf0 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -1238,9 +1238,8 @@ int ptrace_request(struct task_struct *child, long request, return ptrace_resume(child, request, data); case PTRACE_KILL: - if (child->exit_state) /* already dead */ - return 0; - return ptrace_resume(child, request, SIGKILL); + send_sig_info(SIGKILL, SEND_SIG_NOINFO, child); + return 0; #ifdef CONFIG_HAVE_ARCH_TRACEHOOK case PTRACE_GETREGSET: -- GitLab From 018110b5daa8ed3a3c89e4ad9a7cddbf7e5057e7 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 10 May 2022 15:10:18 +0800 Subject: [PATCH 0385/4335] btrfs: add "0x" prefix for unsupported optional features commit d5321a0fa8bc49f11bea0b470800962c17d92d8f upstream. The following error message lack the "0x" obviously: cannot mount because of unsupported optional features (4000) Add the prefix to make it less confusing. This can happen on older kernels that try to mount a filesystem with newer features so it makes sense to backport to older trees. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8cbed2f08d1bd..9ab2792055271 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3370,7 +3370,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device ~BTRFS_FEATURE_INCOMPAT_SUPP; if (features) { btrfs_err(fs_info, - "cannot mount because of unsupported optional features (%llx)", + "cannot mount because of unsupported optional features (0x%llx)", features); err = -EINVAL; goto fail_alloc; @@ -3408,7 +3408,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device ~BTRFS_FEATURE_COMPAT_RO_SUPP; if (!sb_rdonly(sb) && features) { btrfs_err(fs_info, - "cannot mount read-write because of unsupported optional features (%llx)", + "cannot mount read-write because of unsupported optional features (0x%llx)", features); err = -EINVAL; goto fail_alloc; -- GitLab From b17dada226ab19b8c9cdb9b96838387ba8a77d42 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 12 Apr 2022 20:30:15 +0800 Subject: [PATCH 0386/4335] btrfs: return correct error number for __extent_writepage_io() commit 44e5801fada6925d2bba1987c7b59cbcc9d0d592 upstream. [BUG] If we hit an error from submit_extent_page() inside __extent_writepage_io(), we could still return 0 to the caller, and even trigger the warning in btrfs_page_assert_not_dirty(). [CAUSE] In __extent_writepage_io(), if we hit an error from submit_extent_page(), we will just clean up the range and continue. This is completely fine for regular PAGE_SIZE == sectorsize, as we can only hit one sector in one page, thus after the error we're ensured to exit and @ret will be saved. But for subpage case, we may have other dirty subpage range in the page, and in the next loop, we may succeeded submitting the next range. In that case, @ret will be overwritten, and we return 0 to the caller, while we have hit some error. [FIX] Introduce @has_error and @saved_ret to record the first error we hit, so we will never forget what error we hit. CC: stable@vger.kernel.org # 5.15+ Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 96aeb16bd65c9..3895fce3674ee 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3902,10 +3902,12 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, u64 extent_offset; u64 block_start; struct extent_map *em; + int saved_ret = 0; int ret = 0; int nr = 0; u32 opf = REQ_OP_WRITE; const unsigned int write_flags = wbc_to_write_flags(wbc); + bool has_error = false; bool compressed; ret = btrfs_writepage_cow_fixup(page); @@ -3956,6 +3958,9 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, if (IS_ERR_OR_NULL(em)) { btrfs_page_set_error(fs_info, page, cur, end - cur + 1); ret = PTR_ERR_OR_ZERO(em); + has_error = true; + if (!saved_ret) + saved_ret = ret; break; } @@ -4019,6 +4024,10 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, end_bio_extent_writepage, 0, 0, false); if (ret) { + has_error = true; + if (!saved_ret) + saved_ret = ret; + btrfs_page_set_error(fs_info, page, cur, iosize); if (PageWriteback(page)) btrfs_page_clear_writeback(fs_info, page, cur, @@ -4032,8 +4041,10 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, * If we finish without problem, we should not only clear page dirty, * but also empty subpage dirty bits */ - if (!ret) + if (!has_error) btrfs_page_assert_not_dirty(fs_info, page); + else + ret = saved_ret; *nr_ret = nr; return ret; } -- GitLab From 80e2340d1f8071e3cd6044dfc14a987dac26d1cd Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 28 Feb 2022 15:05:53 +0800 Subject: [PATCH 0387/4335] btrfs: repair super block num_devices automatically MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d201238ccd2f30b9bfcfadaeae0972e3a486a176 upstream. [BUG] There is a report that a btrfs has a bad super block num devices. This makes btrfs to reject the fs completely. BTRFS error (device sdd3): super_num_devices 3 mismatch with num_devices 2 found here BTRFS error (device sdd3): failed to read chunk tree: -22 BTRFS error (device sdd3): open_ctree failed [CAUSE] During btrfs device removal, chunk tree and super block num devs are updated in two different transactions: btrfs_rm_device() |- btrfs_rm_dev_item(device) | |- trans = btrfs_start_transaction() | | Now we got transaction X | | | |- btrfs_del_item() | | Now device item is removed from chunk tree | | | |- btrfs_commit_transaction() | Transaction X got committed, super num devs untouched, | but device item removed from chunk tree. | (AKA, super num devs is already incorrect) | |- cur_devices->num_devices--; |- cur_devices->total_devices--; |- btrfs_set_super_num_devices() All those operations are not in transaction X, thus it will only be written back to disk in next transaction. So after the transaction X in btrfs_rm_dev_item() committed, but before transaction X+1 (which can be minutes away), a power loss happen, then we got the super num mismatch. This has been fixed by commit bbac58698a55 ("btrfs: remove device item and update super block in the same transaction"). [FIX] Make the super_num_devices check less strict, converting it from a hard error to a warning, and reset the value to a correct one for the current or next transaction commit. As the number of device items is the critical information where the super block num_devices is only a cached value (and also useful for cross checking), it's safe to automatically update it. Other device related problems like missing device are handled after that and may require other means to resolve, like degraded mount. With this fix, potentially affected filesystems won't fail mount and require the manual repair by btrfs check. Reported-by: Luca Béla Palkovics Link: https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46CKmp3igr44okQ@mail.gmail.com/ CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 471cc4706a074..378e03a93e101 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7596,12 +7596,12 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) * do another round of validation checks. */ if (total_dev != fs_info->fs_devices->total_devices) { - btrfs_err(fs_info, - "super_num_devices %llu mismatch with num_devices %llu found here", + btrfs_warn(fs_info, +"super block num_devices %llu mismatch with DEV_ITEM count %llu, will be repaired on next transaction commit", btrfs_super_num_devices(fs_info->super_copy), total_dev); - ret = -EINVAL; - goto error; + fs_info->fs_devices->total_devices = total_dev; + btrfs_set_super_num_devices(fs_info->super_copy, total_dev); } if (btrfs_super_total_bytes(fs_info->super_copy) < fs_info->fs_devices->total_rw_bytes) { -- GitLab From 08128d6cac4e3eff37e22e75674b40baa0053898 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 12 Apr 2022 20:30:14 +0800 Subject: [PATCH 0388/4335] btrfs: fix the error handling for submit_extent_page() for btrfs_do_readpage() commit 10f7f6f879c28f8368d6516ab1ccf3517a1f5d3d upstream. [BUG] Test case generic/475 have a very high chance (almost 100%) to hit a fs hang, where a data page will never be unlocked and hang all later operations. [CAUSE] In btrfs_do_readpage(), if we hit an error from submit_extent_page() we will try to do the cleanup for our current io range, and exit. This works fine for PAGE_SIZE == sectorsize cases, but not for subpage. For subpage btrfs_do_readpage() will lock the full page first, which can contain several different sectors and extents: btrfs_do_readpage() |- begin_page_read() | |- btrfs_subpage_start_reader(); | Now the page will have PAGE_SIZE / sectorsize reader pending, | and the page is locked. | |- end_page_read() for different branches | This function will reduce subpage readers, and when readers | reach 0, it will unlock the page. But when submit_extent_page() failed, we only cleanup the current io range, while the remaining io range will never be cleaned up, and the page remains locked forever. [FIX] Update the error handling of submit_extent_page() to cleanup all the remaining subpage range before exiting the loop. Please note that, now submit_extent_page() can only fail due to sanity check in alloc_new_bio(). Thus regular IO errors are impossible to trigger the error path. CC: stable@vger.kernel.org # 5.15+ Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3895fce3674ee..6dd375ed6e3d5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3727,8 +3727,12 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, this_bio_flag, force_bio_submit); if (ret) { - unlock_extent(tree, cur, cur + iosize - 1); - end_page_read(page, false, cur, iosize); + /* + * We have to unlock the remaining range, or the page + * will never be unlocked. + */ + unlock_extent(tree, cur, end); + end_page_read(page, false, cur, end + 1 - cur); goto out; } cur = cur + iosize; -- GitLab From 4475d3c3b224ce8c0790198a13a4e72ff60b1226 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 2 Mar 2022 10:02:56 +0530 Subject: [PATCH 0389/4335] iommu/vt-d: Add RPLS to quirk list to skip TE disabling [ Upstream commit 0a967f5bfd9134b89681cae58deb222e20840e76 ] The VT-d spec requires (10.4.4 Global Command Register, TE field) that: Hardware implementations supporting DMA draining must drain any in-flight DMA read/write requests queued within the Root-Complex before completing the translation enable command and reflecting the status of the command through the TES field in the Global Status register. Unfortunately, some integrated graphic devices fail to do so after some kind of power state transition. As the result, the system might stuck in iommu_disable_translati on(), waiting for the completion of TE transition. This adds RPLS to a quirk list for those devices and skips TE disabling if the qurik hits. Link: https://gitlab.freedesktop.org/drm/intel/-/issues/4898 Tested-by: Raviteja Goud Talla Cc: Rodrigo Vivi Acked-by: Lu Baolu Signed-off-by: Tejas Upadhyay Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20220302043256.191529-1-tejaskumarx.surendrakumar.upadhyay@intel.com Signed-off-by: Sasha Levin --- drivers/iommu/intel/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 91a5c75966f32..a1ffb3d6d9015 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5728,7 +5728,7 @@ static void quirk_igfx_skip_te_disable(struct pci_dev *dev) ver = (dev->device >> 8) & 0xff; if (ver != 0x45 && ver != 0x46 && ver != 0x4c && ver != 0x4e && ver != 0x8a && ver != 0x98 && - ver != 0x9a) + ver != 0x9a && ver != 0xa7) return; if (risky_device(dev)) -- GitLab From 079164db407c204065c4c87b7fc281ece180b056 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 18 Mar 2022 13:43:30 -0400 Subject: [PATCH 0390/4335] drm/vmwgfx: validate the screen formats [ Upstream commit 8bb75aeb58bd688d70827ae179bd3da57b6d975b ] The kms code wasn't validating the modifiers and was letting through unsupported formats. rgb8 was never properly supported and has no matching svga screen target format so remove it. This fixes format/modifier failures in kms_addfb_basic from IGT. Signed-off-by: Zack Rusin Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20220318174332.440068-4-zack@kde.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 30 +++++++++++++++-------------- drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 1 - 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 50c64e7813be1..171e90c4b9f3f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -916,6 +916,15 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, * Sanity checks. */ + if (!drm_any_plane_has_format(&dev_priv->drm, + mode_cmd->pixel_format, + mode_cmd->modifier[0])) { + drm_dbg(&dev_priv->drm, + "unsupported pixel format %p4cc / modifier 0x%llx\n", + &mode_cmd->pixel_format, mode_cmd->modifier[0]); + return -EINVAL; + } + /* Surface must be marked as a scanout. */ if (unlikely(!surface->metadata.scanout)) return -EINVAL; @@ -1229,20 +1238,13 @@ static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv, return -EINVAL; } - /* Limited framebuffer color depth support for screen objects */ - if (dev_priv->active_display_unit == vmw_du_screen_object) { - switch (mode_cmd->pixel_format) { - case DRM_FORMAT_XRGB8888: - case DRM_FORMAT_ARGB8888: - break; - case DRM_FORMAT_XRGB1555: - case DRM_FORMAT_RGB565: - break; - default: - DRM_ERROR("Invalid pixel format: %p4cc\n", - &mode_cmd->pixel_format); - return -EINVAL; - } + if (!drm_any_plane_has_format(&dev_priv->drm, + mode_cmd->pixel_format, + mode_cmd->modifier[0])) { + drm_dbg(&dev_priv->drm, + "unsupported pixel format %p4cc / modifier 0x%llx\n", + &mode_cmd->pixel_format, mode_cmd->modifier[0]); + return -EINVAL; } vfbd = kzalloc(sizeof(*vfbd), GFP_KERNEL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index bbc809f7bd8a9..8c8ee87fd3ac7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -248,7 +248,6 @@ struct vmw_framebuffer_bo { static const uint32_t __maybe_unused vmw_primary_plane_formats[] = { DRM_FORMAT_XRGB1555, DRM_FORMAT_RGB565, - DRM_FORMAT_RGB888, DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, }; -- GitLab From 32e10aabc287f09a148ff759bb9ce70b01b0012c Mon Sep 17 00:00:00 2001 From: Liu Zixian Date: Tue, 22 Mar 2022 17:17:30 +0800 Subject: [PATCH 0391/4335] drm/virtio: fix NULL pointer dereference in virtio_gpu_conn_get_modes [ Upstream commit 194d250cdc4a40ccbd179afd522a9e9846957402 ] drm_cvt_mode may return NULL and we should check it. This bug is found by syzkaller: FAULT_INJECTION stacktrace: [ 168.567394] FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 0, space 0, times 1 [ 168.567403] CPU: 1 PID: 6425 Comm: syz Kdump: loaded Not tainted 4.19.90-vhulk2201.1.0.h1035.kasan.eulerosv2r10.aarch64 #1 [ 168.567406] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 168.567408] Call trace: [ 168.567414] dump_backtrace+0x0/0x310 [ 168.567418] show_stack+0x28/0x38 [ 168.567423] dump_stack+0xec/0x15c [ 168.567427] should_fail+0x3ac/0x3d0 [ 168.567437] __should_failslab+0xb8/0x120 [ 168.567441] should_failslab+0x28/0xc0 [ 168.567445] kmem_cache_alloc_trace+0x50/0x640 [ 168.567454] drm_mode_create+0x40/0x90 [ 168.567458] drm_cvt_mode+0x48/0xc78 [ 168.567477] virtio_gpu_conn_get_modes+0xa8/0x140 [virtio_gpu] [ 168.567485] drm_helper_probe_single_connector_modes+0x3a4/0xd80 [ 168.567492] drm_mode_getconnector+0x2e0/0xa70 [ 168.567496] drm_ioctl_kernel+0x11c/0x1d8 [ 168.567514] drm_ioctl+0x558/0x6d0 [ 168.567522] do_vfs_ioctl+0x160/0xf30 [ 168.567525] ksys_ioctl+0x98/0xd8 [ 168.567530] __arm64_sys_ioctl+0x50/0xc8 [ 168.567536] el0_svc_common+0xc8/0x320 [ 168.567540] el0_svc_handler+0xf8/0x160 [ 168.567544] el0_svc+0x10/0x218 KASAN stacktrace: [ 168.567561] BUG: KASAN: null-ptr-deref in virtio_gpu_conn_get_modes+0xb4/0x140 [virtio_gpu] [ 168.567565] Read of size 4 at addr 0000000000000054 by task syz/6425 [ 168.567566] [ 168.567571] CPU: 1 PID: 6425 Comm: syz Kdump: loaded Not tainted 4.19.90-vhulk2201.1.0.h1035.kasan.eulerosv2r10.aarch64 #1 [ 168.567573] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 168.567575] Call trace: [ 168.567578] dump_backtrace+0x0/0x310 [ 168.567582] show_stack+0x28/0x38 [ 168.567586] dump_stack+0xec/0x15c [ 168.567591] kasan_report+0x244/0x2f0 [ 168.567594] __asan_load4+0x58/0xb0 [ 168.567607] virtio_gpu_conn_get_modes+0xb4/0x140 [virtio_gpu] [ 168.567612] drm_helper_probe_single_connector_modes+0x3a4/0xd80 [ 168.567617] drm_mode_getconnector+0x2e0/0xa70 [ 168.567621] drm_ioctl_kernel+0x11c/0x1d8 [ 168.567624] drm_ioctl+0x558/0x6d0 [ 168.567628] do_vfs_ioctl+0x160/0xf30 [ 168.567632] ksys_ioctl+0x98/0xd8 [ 168.567636] __arm64_sys_ioctl+0x50/0xc8 [ 168.567641] el0_svc_common+0xc8/0x320 [ 168.567645] el0_svc_handler+0xf8/0x160 [ 168.567649] el0_svc+0x10/0x218 Signed-off-by: Liu Zixian Link: http://patchwork.freedesktop.org/patch/msgid/20220322091730.1653-1-liuzixian4@huawei.com Signed-off-by: Gerd Hoffmann Signed-off-by: Sasha Levin --- drivers/gpu/drm/virtio/virtgpu_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c index a6caebd4a0dd6..ef1f19083cd31 100644 --- a/drivers/gpu/drm/virtio/virtgpu_display.c +++ b/drivers/gpu/drm/virtio/virtgpu_display.c @@ -179,6 +179,8 @@ static int virtio_gpu_conn_get_modes(struct drm_connector *connector) DRM_DEBUG("add mode: %dx%d\n", width, height); mode = drm_cvt_mode(connector->dev, width, height, 60, false, false, false); + if (!mode) + return count; mode->type |= DRM_MODE_TYPE_PREFERRED; drm_mode_probed_add(connector, mode); count++; -- GitLab From 3117e7a0de6f82d154542b82a2df369e801508db Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 31 Mar 2022 17:09:49 +0300 Subject: [PATCH 0392/4335] selftests/bpf: Fix vfs_link kprobe definition [ Upstream commit e299bcd4d16ff86f46c48df1062c8aae0eca1ed8 ] Since commit 6521f8917082 ("namei: prepare for idmapped mounts") vfs_link's prototype was changed, the kprobe definition in profiler selftest in turn wasn't updated. The result is that all argument after the first are now stored in different registers. This means that self-test has been broken ever since. Fix it by updating the kprobe definition accordingly. Signed-off-by: Nikolay Borisov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220331140949.1410056-1-nborisov@suse.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/progs/profiler.inc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 4896fdf816f73..92331053dba3b 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -826,8 +826,9 @@ out: SEC("kprobe/vfs_link") int BPF_KPROBE(kprobe__vfs_link, - struct dentry* old_dentry, struct inode* dir, - struct dentry* new_dentry, struct inode** delegated_inode) + struct dentry* old_dentry, struct user_namespace *mnt_userns, + struct inode* dir, struct dentry* new_dentry, + struct inode** delegated_inode) { struct bpf_func_stats_ctx stats_ctx; bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link); -- GitLab From 2485af5ca4bb32132137ac6b0fb5080e0be832f7 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 4 Apr 2022 15:09:44 +0100 Subject: [PATCH 0393/4335] selftests/bpf: Fix parsing of prog types in UAPI hdr for bpftool sync [ Upstream commit 4eeebce6ac4ad80ee8243bb847c98e0e55848d47 ] The script for checking that various lists of types in bpftool remain in sync with the UAPI BPF header uses a regex to parse enum bpf_prog_type. If this enum contains a set of values different from the list of program types in bpftool, it complains. This script should have reported the addition, some time ago, of the new BPF_PROG_TYPE_SYSCALL, which was not reported to bpftool's program types list. It failed to do so, because it failed to parse that new type from the enum. This is because the new value, in the BPF header, has an explicative comment on the same line, and the regex does not support that. Let's update the script to support parsing enum values when they have comments on the same line. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220404140944.64744-1-quentin@isovalent.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/test_bpftool_synctypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index be54b7335a76e..5717db4e08621 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -172,7 +172,7 @@ class FileExtractor(object): @enum_name: name of the enum to parse """ start_marker = re.compile(f'enum {enum_name} {{\n') - pattern = re.compile('^\s*(BPF_\w+),?$') + pattern = re.compile('^\s*(BPF_\w+),?(\s+/\*.*\*/)?$') end_marker = re.compile('^};') parser = BlockParser(self.reader) parser.search_block(start_marker) -- GitLab From 07ea9293330d6598ae451b002c0f576941486be6 Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Mon, 21 Mar 2022 23:55:16 +0100 Subject: [PATCH 0394/4335] mwifiex: add mutex lock for call in mwifiex_dfs_chan_sw_work_queue [ Upstream commit 3e12968f6d12a34b540c39cbd696a760cc4616f0 ] cfg80211_ch_switch_notify uses ASSERT_WDEV_LOCK to assert that net_device->ieee80211_ptr->mtx (which is the same as priv->wdev.mtx) is held during the function's execution. mwifiex_dfs_chan_sw_work_queue is one of its callers, which does not hold that lock, therefore violating the assertion. Add a lock around the call. Disclaimer: I am currently working on a static analyser to detect missing locks. This was a reported case. I manually verified the report by looking at the code, so that I do not send wrong information or patches. After concluding that this seems to be a true positive, I created this patch. However, as I do not in fact have this particular hardware, I was unable to test it. Reviewed-by: Brian Norris Signed-off-by: Niels Dossche Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220321225515.32113-1-dossche.niels@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/11h.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/11h.c b/drivers/net/wireless/marvell/mwifiex/11h.c index d2ee6469e67bb..3fa25cd64cda0 100644 --- a/drivers/net/wireless/marvell/mwifiex/11h.c +++ b/drivers/net/wireless/marvell/mwifiex/11h.c @@ -303,5 +303,7 @@ void mwifiex_dfs_chan_sw_work_queue(struct work_struct *work) mwifiex_dbg(priv->adapter, MSG, "indicating channel switch completion to kernel\n"); + mutex_lock(&priv->wdev.mtx); cfg80211_ch_switch_notify(priv->netdev, &priv->dfs_chandef); + mutex_unlock(&priv->wdev.mtx); } -- GitLab From e7823a60f339d5ce19c4768e2d3264a211f203f7 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Fri, 25 Mar 2022 18:17:13 +0800 Subject: [PATCH 0395/4335] b43legacy: Fix assigning negative value to unsigned variable [ Upstream commit 3f6b867559b3d43a7ce1b4799b755e812fc0d503 ] fix warning reported by smatch: drivers/net/wireless/broadcom/b43legacy/phy.c:1181 b43legacy_phy_lo_b_measure() warn: assigning (-772) to unsigned variable 'fval' Signed-off-by: Haowen Bai Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1648203433-8736-1-git-send-email-baihaowen@meizu.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/b43legacy/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43legacy/phy.c b/drivers/net/wireless/broadcom/b43legacy/phy.c index 05404fbd1e70b..c1395e622759e 100644 --- a/drivers/net/wireless/broadcom/b43legacy/phy.c +++ b/drivers/net/wireless/broadcom/b43legacy/phy.c @@ -1123,7 +1123,7 @@ void b43legacy_phy_lo_b_measure(struct b43legacy_wldev *dev) struct b43legacy_phy *phy = &dev->phy; u16 regstack[12] = { 0 }; u16 mls; - u16 fval; + s16 fval; int i; int j; -- GitLab From 9547e5ed9c59bfc16675d831479d0b151f15e2d7 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Fri, 25 Mar 2022 18:15:15 +0800 Subject: [PATCH 0396/4335] b43: Fix assigning negative value to unsigned variable [ Upstream commit 11800d893b38e0e12d636c170c1abc19c43c730c ] fix warning reported by smatch: drivers/net/wireless/broadcom/b43/phy_n.c:585 b43_nphy_adjust_lna_gain_table() warn: assigning (-2) to unsigned variable '*(lna_gain[0])' Signed-off-by: Haowen Bai Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1648203315-28093-1-git-send-email-baihaowen@meizu.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/b43/phy_n.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43/phy_n.c b/drivers/net/wireless/broadcom/b43/phy_n.c index cf3ccf4ddfe72..aa5c994656749 100644 --- a/drivers/net/wireless/broadcom/b43/phy_n.c +++ b/drivers/net/wireless/broadcom/b43/phy_n.c @@ -582,7 +582,7 @@ static void b43_nphy_adjust_lna_gain_table(struct b43_wldev *dev) u16 data[4]; s16 gain[2]; u16 minmax[2]; - static const u16 lna_gain[4] = { -2, 10, 19, 25 }; + static const s16 lna_gain[4] = { -2, 10, 19, 25 }; if (nphy->hang_avoid) b43_nphy_stay_in_carrier_search(dev, 1); -- GitLab From 48d4a820fd33f012e5f63735a59d15b5a3882882 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Fri, 1 Apr 2022 15:10:54 +0800 Subject: [PATCH 0397/4335] ipw2x00: Fix potential NULL dereference in libipw_xmit() [ Upstream commit e8366bbabe1d207cf7c5b11ae50e223ae6fc278b ] crypt and crypt->ops could be null, so we need to checking null before dereference Signed-off-by: Haowen Bai Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1648797055-25730-1-git-send-email-baihaowen@meizu.com Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/ipw2x00/libipw_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c index 36d1e6b2568db..4aec1fce1ae29 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c @@ -383,7 +383,7 @@ netdev_tx_t libipw_xmit(struct sk_buff *skb, struct net_device *dev) /* Each fragment may need to have room for encryption * pre/postfix */ - if (host_encrypt) + if (host_encrypt && crypt && crypt->ops) bytes_per_frag -= crypt->ops->extra_mpdu_prefix_len + crypt->ops->extra_mpdu_postfix_len; -- GitLab From 7665af570b843f699eaf139609f070d910b7c0d6 Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Mon, 4 Apr 2022 01:15:24 +0200 Subject: [PATCH 0398/4335] ipv6: fix locking issues with loops over idev->addr_list [ Upstream commit 51454ea42c1ab4e0c2828bb0d4d53957976980de ] idev->addr_list needs to be protected by idev->lock. However, it is not always possible to do so while iterating and performing actions on inet6_ifaddr instances. For example, multiple functions (like addrconf_{join,leave}_anycast) eventually call down to other functions that acquire the idev->lock. The current code temporarily unlocked the idev->lock during the loops, which can cause race conditions. Moving the locks up is also not an appropriate solution as the ordering of lock acquisition will be inconsistent with for example mc_lock. This solution adds an additional field to inet6_ifaddr that is used to temporarily add the instances to a temporary list while holding idev->lock. The temporary list can then be traversed without holding idev->lock. This change was done in two places. In addrconf_ifdown, the list_for_each_entry_safe variant of the list loop is also no longer necessary as there is no deletion within that specific loop. Suggested-by: Paolo Abeni Signed-off-by: Niels Dossche Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20220403231523.45843-1-dossche.niels@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/if_inet6.h | 8 ++++++++ net/ipv6/addrconf.c | 30 ++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 653e7d0f65cb7..8ec0878a90a7a 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -64,6 +64,14 @@ struct inet6_ifaddr { struct hlist_node addr_lst; struct list_head if_list; + /* + * Used to safely traverse idev->addr_list in process context + * if the idev->lock needed to protect idev->addr_list cannot be held. + * In that case, add the items to this list temporarily and iterate + * without holding idev->lock. + * See addrconf_ifdown and dev_forward_change. + */ + struct list_head if_list_aux; struct list_head tmp_list; struct inet6_ifaddr *ifpub; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3a8838b79bb69..1ba5ff21412cf 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -798,6 +798,7 @@ static void dev_forward_change(struct inet6_dev *idev) { struct net_device *dev; struct inet6_ifaddr *ifa; + LIST_HEAD(tmp_addr_list); if (!idev) return; @@ -816,14 +817,24 @@ static void dev_forward_change(struct inet6_dev *idev) } } + read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa->flags&IFA_F_TENTATIVE) continue; + list_add_tail(&ifa->if_list_aux, &tmp_addr_list); + } + read_unlock_bh(&idev->lock); + + while (!list_empty(&tmp_addr_list)) { + ifa = list_first_entry(&tmp_addr_list, + struct inet6_ifaddr, if_list_aux); + list_del(&ifa->if_list_aux); if (idev->cnf.forwarding) addrconf_join_anycast(ifa); else addrconf_leave_anycast(ifa); } + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_FORWARDING, dev->ifindex, &idev->cnf); @@ -3728,7 +3739,8 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) unsigned long event = unregister ? NETDEV_UNREGISTER : NETDEV_DOWN; struct net *net = dev_net(dev); struct inet6_dev *idev; - struct inet6_ifaddr *ifa, *tmp; + struct inet6_ifaddr *ifa; + LIST_HEAD(tmp_addr_list); bool keep_addr = false; bool was_ready; int state, i; @@ -3820,16 +3832,23 @@ restart: write_lock_bh(&idev->lock); } - list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { + list_for_each_entry(ifa, &idev->addr_list, if_list) + list_add_tail(&ifa->if_list_aux, &tmp_addr_list); + write_unlock_bh(&idev->lock); + + while (!list_empty(&tmp_addr_list)) { struct fib6_info *rt = NULL; bool keep; + ifa = list_first_entry(&tmp_addr_list, + struct inet6_ifaddr, if_list_aux); + list_del(&ifa->if_list_aux); + addrconf_del_dad_work(ifa); keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && !addr_is_local(&ifa->addr); - write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); if (keep) { @@ -3860,15 +3879,14 @@ restart: addrconf_leave_solict(ifa->idev, &ifa->addr); } - write_lock_bh(&idev->lock); if (!keep) { + write_lock_bh(&idev->lock); list_del_rcu(&ifa->if_list); + write_unlock_bh(&idev->lock); in6_ifa_put(ifa); } } - write_unlock_bh(&idev->lock); - /* Step 5: Discard anycast and multicast list */ if (unregister) { ipv6_ac_destroy_dev(idev); -- GitLab From 0781b564048b0d2a4b966d8000498af7d754bad0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 5 Apr 2022 23:03:31 +0200 Subject: [PATCH 0399/4335] fbcon: Consistently protect deferred_takeover with console_lock() [ Upstream commit 43553559121ca90965b572cf8a1d6d0fd618b449 ] This shouldn't be a problem in practice since until we've actually taken over the console there's nothing we've registered with the console/vt subsystem, so the exit/unbind path that check this can't do the wrong thing. But it's confusing, so fix it by moving it a tad later. Acked-by: Sam Ravnborg Signed-off-by: Daniel Vetter Cc: Daniel Vetter Cc: Du Cheng Cc: Tetsuo Handa Cc: Claudio Suarez Cc: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220405210335.3434130-14-daniel.vetter@ffwll.ch Signed-off-by: Sasha Levin --- drivers/video/fbdev/core/fbcon.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index a53c1f6906f09..a25b63b56223f 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -3265,6 +3265,9 @@ static void fbcon_register_existing_fbs(struct work_struct *work) console_lock(); + deferred_takeover = false; + logo_shown = FBCON_LOGO_DONTSHOW; + for_each_registered_fb(i) fbcon_fb_registered(registered_fb[i]); @@ -3282,8 +3285,6 @@ static int fbcon_output_notifier(struct notifier_block *nb, pr_info("fbcon: Taking over console\n"); dummycon_unregister_output_notifier(&fbcon_output_nb); - deferred_takeover = false; - logo_shown = FBCON_LOGO_DONTSHOW; /* We may get called in atomic context */ schedule_work(&fbcon_deferred_takeover_work); -- GitLab From 063ed7dbf9d3d44ba5fefe6ee638f3666988f9d4 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 6 Apr 2022 14:51:48 -0500 Subject: [PATCH 0400/4335] x86/platform/uv: Update TSC sync state for UV5 [ Upstream commit bb3ab81bdbd53f88f26ffabc9fb15bd8466486ec ] The UV5 platform synchronizes the TSCs among all chassis, and will not proceed to OS boot without achieving synchronization. Previous UV platforms provided a register indicating successful synchronization. This is no longer available on UV5. On this platform TSC_ADJUST should not be reset by the kernel. Signed-off-by: Mike Travis Signed-off-by: Steve Wahl Signed-off-by: Borislav Petkov Reviewed-by: Dimitri Sivanich Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220406195149.228164-3-steve.wahl@hpe.com Signed-off-by: Sasha Levin --- arch/x86/kernel/apic/x2apic_uv_x.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f5a48e66e4f54..a6e9c2794ef56 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -199,7 +199,13 @@ static void __init uv_tsc_check_sync(void) int mmr_shift; char *state; - /* Different returns from different UV BIOS versions */ + /* UV5 guarantees synced TSCs; do not zero TSC_ADJUST */ + if (!is_uv(UV2|UV3|UV4)) { + mark_tsc_async_resets("UV5+"); + return; + } + + /* UV2,3,4, UV BIOS TSC sync state available */ mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR); mmr_shift = is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT; -- GitLab From a425d18a131fe89c61d6a07a441cd4c57b97c31f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 6 Apr 2022 02:29:38 +0300 Subject: [PATCH 0401/4335] ACPICA: Avoid cache flush inside virtual machines [ Upstream commit e2efb6359e620521d1e13f69b2257de8ceaa9475 ] While running inside virtual machine, the kernel can bypass cache flushing. Changing sleep state in a virtual machine doesn't affect the host system sleep state and cannot lead to data loss. Before entering sleep states, the ACPI code flushes caches to prevent data loss using the WBINVD instruction. This mechanism is required on bare metal. But, any use WBINVD inside of a guest is worthless. Changing sleep state in a virtual machine doesn't affect the host system sleep state and cannot lead to data loss, so most hypervisors simply ignore it. Despite this, the ACPI code calls WBINVD unconditionally anyway. It's useless, but also normally harmless. In TDX guests, though, WBINVD stops being harmless; it triggers a virtualization exception (#VE). If the ACPI cache-flushing WBINVD were left in place, TDX guests would need handling to recover from the exception. Avoid using WBINVD whenever running under a hypervisor. This both removes the useless WBINVDs and saves TDX from implementing WBINVD handling. Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Dave Hansen Reviewed-by: Dan Williams Reviewed-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20220405232939.73860-30-kirill.shutemov@linux.intel.com Signed-off-by: Sasha Levin --- arch/x86/include/asm/acenv.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h index 9aff97f0de7fd..d937c55e717e6 100644 --- a/arch/x86/include/asm/acenv.h +++ b/arch/x86/include/asm/acenv.h @@ -13,7 +13,19 @@ /* Asm macros */ -#define ACPI_FLUSH_CPU_CACHE() wbinvd() +/* + * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states. + * It is required to prevent data loss. + * + * While running inside virtual machine, the kernel can bypass cache flushing. + * Changing sleep state in a virtual machine doesn't affect the host system + * sleep state and cannot lead to data loss. + */ +#define ACPI_FLUSH_CPU_CACHE() \ +do { \ + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) \ + wbinvd(); \ +} while (0) int __acpi_acquire_global_lock(unsigned int *lock); int __acpi_release_global_lock(unsigned int *lock); -- GitLab From 042f8d5a13d270e1cbda3cde410a5544b5468306 Mon Sep 17 00:00:00 2001 From: Peter Seiderer Date: Mon, 4 Apr 2022 18:54:14 +0200 Subject: [PATCH 0402/4335] mac80211: minstrel_ht: fix where rate stats are stored (fixes debugfs output) [ Upstream commit 5c6dd7bd569b54c0d2904125d7366aa93f077f67 ] Using an ath9k card the debugfs output of minstrel_ht looks like the following (note the zero values for the first four rates sum-of success/attempts): best ____________rate__________ ____statistics___ _____last____ ______sum-of________ mode guard # rate [name idx airtime max_tp] [avg(tp) avg(prob)] [retry|suc|att] [#success | #attempts] OFDM 1 DP 6.0M 272 1640 5.2 3.1 53.8 3 0 0 0 0 OFDM 1 C 9.0M 273 1104 7.7 4.6 53.8 4 0 0 0 0 OFDM 1 B 12.0M 274 836 10.0 6.0 53.8 4 0 0 0 0 OFDM 1 A S 18.0M 275 568 14.3 8.5 53.8 5 0 0 0 0 OFDM 1 S 24.0M 276 436 18.1 0.0 0.0 5 0 1 80 1778 OFDM 1 36.0M 277 300 24.9 0.0 0.0 0 0 1 0 107 OFDM 1 S 48.0M 278 236 30.4 0.0 0.0 0 0 0 0 75 OFDM 1 54.0M 279 212 33.0 0.0 0.0 0 0 0 0 72 Total packet count:: ideal 16582 lookaround 885 Average # of aggregated frames per A-MPDU: 1.0 Debugging showed that the rate statistics for the first four rates where stored in the MINSTREL_CCK_GROUP instead of the MINSTREL_OFDM_GROUP because in minstrel_ht_get_stats() the supported check was not honoured as done in various other places, e.g net/mac80211/rc80211_minstrel_ht_debugfs.c: 74 if (!(mi->supported[i] & BIT(j))) 75 continue; With the patch applied the output looks good: best ____________rate__________ ____statistics___ _____last____ ______sum-of________ mode guard # rate [name idx airtime max_tp] [avg(tp) avg(prob)] [retry|suc|att] [#success | #attempts] OFDM 1 D 6.0M 272 1640 5.2 5.2 100.0 3 0 0 1 1 OFDM 1 C 9.0M 273 1104 7.7 7.7 100.0 4 0 0 38 38 OFDM 1 B 12.0M 274 836 10.0 9.9 89.5 4 2 2 372 395 OFDM 1 A P 18.0M 275 568 14.3 14.3 97.2 5 52 53 6956 7181 OFDM 1 S 24.0M 276 436 18.1 0.0 0.0 0 0 1 6 163 OFDM 1 36.0M 277 300 24.9 0.0 0.0 0 0 1 0 35 OFDM 1 S 48.0M 278 236 30.4 0.0 0.0 0 0 0 0 38 OFDM 1 S 54.0M 279 212 33.0 0.0 0.0 0 0 0 0 38 Total packet count:: ideal 7097 lookaround 287 Average # of aggregated frames per A-MPDU: 1.0 Signed-off-by: Peter Seiderer Link: https://lore.kernel.org/r/20220404165414.1036-1-ps.report@gmx.net Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rc80211_minstrel_ht.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 72b44d4c42d0e..90238170dec35 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -364,6 +364,9 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, group = MINSTREL_CCK_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) { + if (!(mi->supported[group] & BIT(idx))) + continue; + if (rate->idx != mp->cck_rates[idx]) continue; -- GitLab From 3f2dc8106345e34bbf425d00dfb2ce708048b7e2 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Thu, 2 Dec 2021 17:00:33 +0000 Subject: [PATCH 0403/4335] drm/komeda: return early if drm_universal_plane_init() fails. [ Upstream commit c8f76c37cc3668ee45e081e76a15f24a352ebbdd ] If drm_universal_plane_init() fails early we jump to the common cleanup code that calls komeda_plane_destroy() which in turn could access the uninitalised drm_plane and crash. Return early if an error is detected without going through the common code. Reported-by: Steven Price Reviewed-by: Steven Price Signed-off-by: Liviu Dudau Link: https://lore.kernel.org/dri-devel/20211203100946.2706922-1-liviu.dudau@arm.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/arm/display/komeda/komeda_plane.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c index d63d83800a8a3..d646e3ae1a23c 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c @@ -275,8 +275,10 @@ static int komeda_plane_add(struct komeda_kms_dev *kms, komeda_put_fourcc_list(formats); - if (err) - goto cleanup; + if (err) { + kfree(kplane); + return err; + } drm_plane_helper_add(plane, &komeda_plane_helper_funcs); -- GitLab From d0d266c2f689d6113fe0d43d1b1fc105157c7443 Mon Sep 17 00:00:00 2001 From: Saaem Rizvi Date: Mon, 28 Mar 2022 14:25:16 -0400 Subject: [PATCH 0404/4335] drm/amd/display: Disabling Z10 on DCN31 [ Upstream commit 5d5af34072c8b11f60960c3bea57ff9de5877791 ] [WHY] Z10 is should not be enabled by default on DCN31. [HOW] Using DC debug flags to disable Z10 by default on DCN31. Reviewed-by: Eric Yang Acked-by: Pavle Kotarac Signed-off-by: Saaem Rizvi Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index a5ef9d5e7685f..310ced5058c4e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -957,6 +957,7 @@ static const struct dc_debug_options debug_defaults_drv = { .optc = false, } }, + .disable_z10 = true, .optimize_edp_link_rate = true, .enable_sw_cntl_psr = true, }; -- GitLab From ba722d061bc4b54802d701fc63fc2fd988934603 Mon Sep 17 00:00:00 2001 From: Padmanabha Srinivasaiah Date: Thu, 17 Feb 2022 16:25:19 +0100 Subject: [PATCH 0405/4335] rcu-tasks: Fix race in schedule and flush work [ Upstream commit f75fd4b9221d93177c50dcfde671b2e907f53e86 ] While booting secondary CPUs, cpus_read_[lock/unlock] is not keeping online cpumask stable. The transient online mask results in below calltrace. [ 0.324121] CPU1: Booted secondary processor 0x0000000001 [0x410fd083] [ 0.346652] Detected PIPT I-cache on CPU2 [ 0.347212] CPU2: Booted secondary processor 0x0000000002 [0x410fd083] [ 0.377255] Detected PIPT I-cache on CPU3 [ 0.377823] CPU3: Booted secondary processor 0x0000000003 [0x410fd083] [ 0.379040] ------------[ cut here ]------------ [ 0.383662] WARNING: CPU: 0 PID: 10 at kernel/workqueue.c:3084 __flush_work+0x12c/0x138 [ 0.384850] Modules linked in: [ 0.385403] CPU: 0 PID: 10 Comm: rcu_tasks_rude_ Not tainted 5.17.0-rc3-v8+ #13 [ 0.386473] Hardware name: Raspberry Pi 4 Model B Rev 1.4 (DT) [ 0.387289] pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 0.388308] pc : __flush_work+0x12c/0x138 [ 0.388970] lr : __flush_work+0x80/0x138 [ 0.389620] sp : ffffffc00aaf3c60 [ 0.390139] x29: ffffffc00aaf3d20 x28: ffffffc009c16af0 x27: ffffff80f761df48 [ 0.391316] x26: 0000000000000004 x25: 0000000000000003 x24: 0000000000000100 [ 0.392493] x23: ffffffffffffffff x22: ffffffc009c16b10 x21: ffffffc009c16b28 [ 0.393668] x20: ffffffc009e53861 x19: ffffff80f77fbf40 x18: 00000000d744fcc9 [ 0.394842] x17: 000000000000000b x16: 00000000000001c2 x15: ffffffc009e57550 [ 0.396016] x14: 0000000000000000 x13: ffffffffffffffff x12: 0000000100000000 [ 0.397190] x11: 0000000000000462 x10: ffffff8040258008 x9 : 0000000100000000 [ 0.398364] x8 : 0000000000000000 x7 : ffffffc0093c8bf4 x6 : 0000000000000000 [ 0.399538] x5 : 0000000000000000 x4 : ffffffc00a976e40 x3 : ffffffc00810444c [ 0.400711] x2 : 0000000000000004 x1 : 0000000000000000 x0 : 0000000000000000 [ 0.401886] Call trace: [ 0.402309] __flush_work+0x12c/0x138 [ 0.402941] schedule_on_each_cpu+0x228/0x278 [ 0.403693] rcu_tasks_rude_wait_gp+0x130/0x144 [ 0.404502] rcu_tasks_kthread+0x220/0x254 [ 0.405264] kthread+0x174/0x1ac [ 0.405837] ret_from_fork+0x10/0x20 [ 0.406456] irq event stamp: 102 [ 0.406966] hardirqs last enabled at (101): [] _raw_spin_unlock_irq+0x78/0xb4 [ 0.408304] hardirqs last disabled at (102): [] el1_dbg+0x24/0x5c [ 0.409410] softirqs last enabled at (54): [] local_bh_enable+0xc/0x2c [ 0.410645] softirqs last disabled at (50): [] local_bh_disable+0xc/0x2c [ 0.411890] ---[ end trace 0000000000000000 ]--- [ 0.413000] smp: Brought up 1 node, 4 CPUs [ 0.413762] SMP: Total of 4 processors activated. [ 0.414566] CPU features: detected: 32-bit EL0 Support [ 0.415414] CPU features: detected: 32-bit EL1 Support [ 0.416278] CPU features: detected: CRC32 instructions [ 0.447021] Callback from call_rcu_tasks_rude() invoked. [ 0.506693] Callback from call_rcu_tasks() invoked. This commit therefore fixes this issue by applying a single-CPU optimization to the RCU Tasks Rude grace-period process. The key point here is that the purpose of this RCU flavor is to force a schedule on each online CPU since some past event. But the rcu_tasks_rude_wait_gp() function runs in the context of the RCU Tasks Rude's grace-period kthread, so there must already have been a context switch on the current CPU since the call to either synchronize_rcu_tasks_rude() or call_rcu_tasks_rude(). So if there is only a single CPU online, RCU Tasks Rude's grace-period kthread does not need to anything at all. It turns out that the rcu_tasks_rude_wait_gp() function's call to schedule_on_each_cpu() causes problems during early boot. During that time, there is only one online CPU, namely the boot CPU. Therefore, applying this single-CPU optimization fixes early-boot instances of this problem. Link: https://lore.kernel.org/lkml/20220210184319.25009-1-treasure4paddy@gmail.com/T/ Suggested-by: Paul E. McKenney Signed-off-by: Padmanabha Srinivasaiah Signed-off-by: Paul E. McKenney Signed-off-by: Sasha Levin --- kernel/rcu/tasks.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 6591914af4864..60c9eacac25bb 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -660,6 +660,9 @@ static void rcu_tasks_be_rude(struct work_struct *work) // Wait for one rude RCU-tasks grace period. static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp) { + if (num_online_cpus() <= 1) + return; // Fastpath for only one CPU. + rtp->n_ipis += cpumask_weight(cpu_online_mask); schedule_on_each_cpu(rcu_tasks_be_rude); } -- GitLab From 2da36b14af94dc736b24941d13217493c117a2c5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 17 Mar 2022 09:30:10 -0700 Subject: [PATCH 0406/4335] rcu: Make TASKS_RUDE_RCU select IRQ_WORK [ Upstream commit 46e861be589881e0905b9ade3d8439883858721c ] The TASKS_RUDE_RCU does not select IRQ_WORK, which can result in build failures for kernels that do not otherwise select IRQ_WORK. This commit therefore causes the TASKS_RUDE_RCU Kconfig option to select IRQ_WORK. Reported-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Paul E. McKenney Signed-off-by: Sasha Levin --- kernel/rcu/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 3128b7cf8e1fd..f73cf17fcee92 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -86,6 +86,7 @@ config TASKS_RCU config TASKS_RUDE_RCU def_bool 0 + select IRQ_WORK help This option enables a task-based RCU implementation that uses only context switch (including preemption) and user-mode -- GitLab From 64e9f4f65c945b8eb6f4b2a7d0312150b4b8d7d8 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Mon, 11 Apr 2022 09:32:37 +0800 Subject: [PATCH 0407/4335] sfc: ef10: Fix assigning negative value to unsigned variable [ Upstream commit b8ff3395fbdf3b79a99d0ef410fc34c51044121e ] fix warning reported by smatch: 251 drivers/net/ethernet/sfc/ef10.c:2259 efx_ef10_tx_tso_desc() warn: assigning (-208) to unsigned variable 'ip_tot_len' Signed-off-by: Haowen Bai Acked-by: Edward Cree Link: https://lore.kernel.org/r/1649640757-30041-1-git-send-email-baihaowen@meizu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/sfc/ef10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index f5a4d8f4fd114..c1cd1c97f09dd 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2256,7 +2256,7 @@ int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb, * guaranteed to satisfy the second as we only attempt TSO if * inner_network_header <= 208. */ - ip_tot_len = -EFX_TSO2_MAX_HDRLEN; + ip_tot_len = 0x10000 - EFX_TSO2_MAX_HDRLEN; EFX_WARN_ON_ONCE_PARANOID(mss + EFX_TSO2_MAX_HDRLEN + (tcp->doff << 2u) > ip_tot_len); -- GitLab From 5cc6f623f4818c7d7e9e966a45ebf324901ca9c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Tue, 12 Apr 2022 11:16:28 +0200 Subject: [PATCH 0408/4335] ALSA: jack: Access input_dev under mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1b6a6fc5280e97559287b61eade2d4b363e836f2 ] It is possible when using ASoC that input_dev is unregistered while calling snd_jack_report, which causes NULL pointer dereference. In order to prevent this serialize access to input_dev using mutex lock. Signed-off-by: Amadeusz Sławiński Reviewed-by: Cezary Rojewski Link: https://lore.kernel.org/r/20220412091628.3056922-1-amadeuszx.slawinski@linux.intel.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- include/sound/jack.h | 1 + sound/core/jack.c | 34 +++++++++++++++++++++++++++------- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/include/sound/jack.h b/include/sound/jack.h index 1181f536557eb..1ed90e2109e9b 100644 --- a/include/sound/jack.h +++ b/include/sound/jack.h @@ -62,6 +62,7 @@ struct snd_jack { const char *id; #ifdef CONFIG_SND_JACK_INPUT_DEV struct input_dev *input_dev; + struct mutex input_dev_lock; int registered; int type; char name[100]; diff --git a/sound/core/jack.c b/sound/core/jack.c index d1e3055f2b6a5..88493cc31914b 100644 --- a/sound/core/jack.c +++ b/sound/core/jack.c @@ -42,8 +42,11 @@ static int snd_jack_dev_disconnect(struct snd_device *device) #ifdef CONFIG_SND_JACK_INPUT_DEV struct snd_jack *jack = device->device_data; - if (!jack->input_dev) + mutex_lock(&jack->input_dev_lock); + if (!jack->input_dev) { + mutex_unlock(&jack->input_dev_lock); return 0; + } /* If the input device is registered with the input subsystem * then we need to use a different deallocator. */ @@ -52,6 +55,7 @@ static int snd_jack_dev_disconnect(struct snd_device *device) else input_free_device(jack->input_dev); jack->input_dev = NULL; + mutex_unlock(&jack->input_dev_lock); #endif /* CONFIG_SND_JACK_INPUT_DEV */ return 0; } @@ -90,8 +94,11 @@ static int snd_jack_dev_register(struct snd_device *device) snprintf(jack->name, sizeof(jack->name), "%s %s", card->shortname, jack->id); - if (!jack->input_dev) + mutex_lock(&jack->input_dev_lock); + if (!jack->input_dev) { + mutex_unlock(&jack->input_dev_lock); return 0; + } jack->input_dev->name = jack->name; @@ -116,6 +123,7 @@ static int snd_jack_dev_register(struct snd_device *device) if (err == 0) jack->registered = 1; + mutex_unlock(&jack->input_dev_lock); return err; } #endif /* CONFIG_SND_JACK_INPUT_DEV */ @@ -517,9 +525,11 @@ int snd_jack_new(struct snd_card *card, const char *id, int type, return -ENOMEM; } - /* don't creat input device for phantom jack */ - if (!phantom_jack) { #ifdef CONFIG_SND_JACK_INPUT_DEV + mutex_init(&jack->input_dev_lock); + + /* don't create input device for phantom jack */ + if (!phantom_jack) { int i; jack->input_dev = input_allocate_device(); @@ -537,8 +547,8 @@ int snd_jack_new(struct snd_card *card, const char *id, int type, input_set_capability(jack->input_dev, EV_SW, jack_switch_types[i]); -#endif /* CONFIG_SND_JACK_INPUT_DEV */ } +#endif /* CONFIG_SND_JACK_INPUT_DEV */ err = snd_device_new(card, SNDRV_DEV_JACK, jack, &ops); if (err < 0) @@ -578,10 +588,14 @@ EXPORT_SYMBOL(snd_jack_new); void snd_jack_set_parent(struct snd_jack *jack, struct device *parent) { WARN_ON(jack->registered); - if (!jack->input_dev) + mutex_lock(&jack->input_dev_lock); + if (!jack->input_dev) { + mutex_unlock(&jack->input_dev_lock); return; + } jack->input_dev->dev.parent = parent; + mutex_unlock(&jack->input_dev_lock); } EXPORT_SYMBOL(snd_jack_set_parent); @@ -629,6 +643,8 @@ EXPORT_SYMBOL(snd_jack_set_key); /** * snd_jack_report - Report the current status of a jack + * Note: This function uses mutexes and should be called from a + * context which can sleep (such as a workqueue). * * @jack: The jack to report status for * @status: The current status of the jack @@ -654,8 +670,11 @@ void snd_jack_report(struct snd_jack *jack, int status) status & jack_kctl->mask_bits); #ifdef CONFIG_SND_JACK_INPUT_DEV - if (!jack->input_dev) + mutex_lock(&jack->input_dev_lock); + if (!jack->input_dev) { + mutex_unlock(&jack->input_dev_lock); return; + } for (i = 0; i < ARRAY_SIZE(jack->key); i++) { int testbit = ((SND_JACK_BTN_0 >> i) & ~mask_bits); @@ -675,6 +694,7 @@ void snd_jack_report(struct snd_jack *jack, int status) } input_sync(jack->input_dev); + mutex_unlock(&jack->input_dev_lock); #endif /* CONFIG_SND_JACK_INPUT_DEV */ } EXPORT_SYMBOL(snd_jack_report); -- GitLab From ecfe184509a519cd165fad6228b8fe59d8cbd9e4 Mon Sep 17 00:00:00 2001 From: Po-Hao Huang Date: Thu, 7 Apr 2022 17:58:58 +0800 Subject: [PATCH 0409/4335] rtw88: 8821c: fix debugfs rssi value [ Upstream commit ece31c93d4d68f7eb8eea4431b052aacdb678de2 ] RSSI value per frame is reported to mac80211 but not maintained in our own statistics, add it back to help us debug. Signed-off-by: Po-Hao Huang Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220407095858.46807-7-pkshih@realtek.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw88/rtw8821c.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index 785b8181513f1..f405f42d1c1b0 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -506,6 +506,7 @@ static s8 get_cck_rx_pwr(struct rtw_dev *rtwdev, u8 lna_idx, u8 vga_idx) static void query_phy_status_page0(struct rtw_dev *rtwdev, u8 *phy_status, struct rtw_rx_pkt_stat *pkt_stat) { + struct rtw_dm_info *dm_info = &rtwdev->dm_info; s8 rx_power; u8 lna_idx = 0; u8 vga_idx = 0; @@ -517,6 +518,7 @@ static void query_phy_status_page0(struct rtw_dev *rtwdev, u8 *phy_status, pkt_stat->rx_power[RF_PATH_A] = rx_power; pkt_stat->rssi = rtw_phy_rf_power_2_rssi(pkt_stat->rx_power, 1); + dm_info->rssi[RF_PATH_A] = pkt_stat->rssi; pkt_stat->bw = RTW_CHANNEL_WIDTH_20; pkt_stat->signal_power = rx_power; } @@ -524,6 +526,7 @@ static void query_phy_status_page0(struct rtw_dev *rtwdev, u8 *phy_status, static void query_phy_status_page1(struct rtw_dev *rtwdev, u8 *phy_status, struct rtw_rx_pkt_stat *pkt_stat) { + struct rtw_dm_info *dm_info = &rtwdev->dm_info; u8 rxsc, bw; s8 min_rx_power = -120; @@ -543,6 +546,7 @@ static void query_phy_status_page1(struct rtw_dev *rtwdev, u8 *phy_status, pkt_stat->rx_power[RF_PATH_A] = GET_PHY_STAT_P1_PWDB_A(phy_status) - 110; pkt_stat->rssi = rtw_phy_rf_power_2_rssi(pkt_stat->rx_power, 1); + dm_info->rssi[RF_PATH_A] = pkt_stat->rssi; pkt_stat->bw = bw; pkt_stat->signal_power = max(pkt_stat->rx_power[RF_PATH_A], min_rx_power); -- GitLab From 30be187da5795e222d1b2a083b9b3604068a7dcc Mon Sep 17 00:00:00 2001 From: Biju Das Date: Mon, 11 Apr 2022 18:31:15 +0100 Subject: [PATCH 0410/4335] spi: spi-rspi: Remove setting {src,dst}_{addr,addr_width} based on DMA direction [ Upstream commit 6f381481a5b236cb53d6de2c49c6ef83a4d0f432 ] The direction field in the DMA config is deprecated. The rspi driver sets {src,dst}_{addr,addr_width} based on the DMA direction and it results in dmaengine_slave_config() failure as RZ DMAC driver validates {src,dst}_addr_width values independent of DMA direction. This patch fixes the issue by passing both {src,dst}_{addr,addr_width} values independent of DMA direction. Signed-off-by: Biju Das Suggested-by: Vinod Koul Reviewed-by: Vinod Koul Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20220411173115.6619-1-biju.das.jz@bp.renesas.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-rspi.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c index d16ed88802d36..d575c935e9f09 100644 --- a/drivers/spi/spi-rspi.c +++ b/drivers/spi/spi-rspi.c @@ -1107,14 +1107,11 @@ static struct dma_chan *rspi_request_dma_chan(struct device *dev, } memset(&cfg, 0, sizeof(cfg)); + cfg.dst_addr = port_addr + RSPI_SPDR; + cfg.src_addr = port_addr + RSPI_SPDR; + cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; + cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; cfg.direction = dir; - if (dir == DMA_MEM_TO_DEV) { - cfg.dst_addr = port_addr; - cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; - } else { - cfg.src_addr = port_addr; - cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; - } ret = dmaengine_slave_config(chan, &cfg); if (ret) { @@ -1145,12 +1142,12 @@ static int rspi_request_dma(struct device *dev, struct spi_controller *ctlr, } ctlr->dma_tx = rspi_request_dma_chan(dev, DMA_MEM_TO_DEV, dma_tx_id, - res->start + RSPI_SPDR); + res->start); if (!ctlr->dma_tx) return -ENODEV; ctlr->dma_rx = rspi_request_dma_chan(dev, DMA_DEV_TO_MEM, dma_rx_id, - res->start + RSPI_SPDR); + res->start); if (!ctlr->dma_rx) { dma_release_channel(ctlr->dma_tx); ctlr->dma_tx = NULL; -- GitLab From fcd1893ef57b88c1656d92e0be7a4f1f23f461d2 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 10 Feb 2022 21:06:56 -0500 Subject: [PATCH 0411/4335] tools/power turbostat: fix ICX DRAM power numbers [ Upstream commit 6397b6418935773a34b533b3348b03f4ce3d7050 ] ICX (and its duplicates) require special hard-coded DRAM RAPL units, rather than using the generic RAPL energy units. Reported-by: Srinivas Pandruvada Signed-off-by: Len Brown Signed-off-by: Sasha Levin --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 47d3ba895d6d9..4f176bbf29f42 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4376,6 +4376,7 @@ static double rapl_dram_energy_units_probe(int model, double rapl_energy_units) case INTEL_FAM6_BROADWELL_X: /* BDX */ case INTEL_FAM6_SKYLAKE_X: /* SKX */ case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ + case INTEL_FAM6_ICELAKE_X: /* ICX */ return (rapl_dram_energy_units = 15.3 / 1000000); default: return (rapl_energy_units); -- GitLab From 271725e4028559ae7974d762a8467dc9de412f2e Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 12 Apr 2022 15:19:44 -0700 Subject: [PATCH 0412/4335] scsi: lpfc: Move cfg_log_verbose check before calling lpfc_dmp_dbg() [ Upstream commit e294647b1aed4247fe52851f3a3b2b19ae906228 ] In an attempt to log message 0126 with LOG_TRACE_EVENT, the following hard lockup call trace hangs the system. Call Trace: _raw_spin_lock_irqsave+0x32/0x40 lpfc_dmp_dbg.part.32+0x28/0x220 [lpfc] lpfc_cmpl_els_fdisc+0x145/0x460 [lpfc] lpfc_sli_cancel_jobs+0x92/0xd0 [lpfc] lpfc_els_flush_cmd+0x43c/0x670 [lpfc] lpfc_els_flush_all_cmd+0x37/0x60 [lpfc] lpfc_sli4_async_event_proc+0x956/0x1720 [lpfc] lpfc_do_work+0x1485/0x1d70 [lpfc] kthread+0x112/0x130 ret_from_fork+0x1f/0x40 Kernel panic - not syncing: Hard LOCKUP The same CPU tries to claim the phba->port_list_lock twice. Move the cfg_log_verbose checks as part of the lpfc_printf_vlog() and lpfc_printf_log() macros before calling lpfc_dmp_dbg(). There is no need to take the phba->port_list_lock within lpfc_dmp_dbg(). Link: https://lore.kernel.org/r/20220412222008.126521-3-jsmart2021@gmail.com Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_init.c | 29 +---------------------------- drivers/scsi/lpfc/lpfc_logmsg.h | 6 +++--- 2 files changed, 4 insertions(+), 31 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 16246526e4c1e..ce103c1f80629 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -15564,34 +15564,7 @@ void lpfc_dmp_dbg(struct lpfc_hba *phba) unsigned int temp_idx; int i; int j = 0; - unsigned long rem_nsec, iflags; - bool log_verbose = false; - struct lpfc_vport *port_iterator; - - /* Don't dump messages if we explicitly set log_verbose for the - * physical port or any vport. - */ - if (phba->cfg_log_verbose) - return; - - spin_lock_irqsave(&phba->port_list_lock, iflags); - list_for_each_entry(port_iterator, &phba->port_list, listentry) { - if (port_iterator->load_flag & FC_UNLOADING) - continue; - if (scsi_host_get(lpfc_shost_from_vport(port_iterator))) { - if (port_iterator->cfg_log_verbose) - log_verbose = true; - - scsi_host_put(lpfc_shost_from_vport(port_iterator)); - - if (log_verbose) { - spin_unlock_irqrestore(&phba->port_list_lock, - iflags); - return; - } - } - } - spin_unlock_irqrestore(&phba->port_list_lock, iflags); + unsigned long rem_nsec; if (atomic_cmpxchg(&phba->dbg_log_dmping, 0, 1) != 0) return; diff --git a/drivers/scsi/lpfc/lpfc_logmsg.h b/drivers/scsi/lpfc/lpfc_logmsg.h index 7d480c7987942..a5aafe230c74f 100644 --- a/drivers/scsi/lpfc/lpfc_logmsg.h +++ b/drivers/scsi/lpfc/lpfc_logmsg.h @@ -73,7 +73,7 @@ do { \ #define lpfc_printf_vlog(vport, level, mask, fmt, arg...) \ do { \ { if (((mask) & (vport)->cfg_log_verbose) || (level[1] <= '3')) { \ - if ((mask) & LOG_TRACE_EVENT) \ + if ((mask) & LOG_TRACE_EVENT && !(vport)->cfg_log_verbose) \ lpfc_dmp_dbg((vport)->phba); \ dev_printk(level, &((vport)->phba->pcidev)->dev, "%d:(%d):" \ fmt, (vport)->phba->brd_no, vport->vpi, ##arg); \ @@ -89,11 +89,11 @@ do { \ (phba)->pport->cfg_log_verbose : \ (phba)->cfg_log_verbose; \ if (((mask) & log_verbose) || (level[1] <= '3')) { \ - if ((mask) & LOG_TRACE_EVENT) \ + if ((mask) & LOG_TRACE_EVENT && !log_verbose) \ lpfc_dmp_dbg(phba); \ dev_printk(level, &((phba)->pcidev)->dev, "%d:" \ fmt, phba->brd_no, ##arg); \ - } else if (!(phba)->cfg_log_verbose)\ + } else if (!log_verbose)\ lpfc_dbg_print(phba, "%d:" fmt, phba->brd_no, ##arg); \ } \ } while (0) -- GitLab From 7625e81de2164a082810e1f27547d388406da610 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 12 Apr 2022 15:19:48 -0700 Subject: [PATCH 0413/4335] scsi: lpfc: Fix SCSI I/O completion and abort handler deadlock [ Upstream commit 03cbbd7c2f5ee288f648f4aeedc765a181188553 ] During stress I/O tests with 500+ vports, hard LOCKUP call traces are observed. CPU A: native_queued_spin_lock_slowpath+0x192 _raw_spin_lock_irqsave+0x32 lpfc_handle_fcp_err+0x4c6 lpfc_fcp_io_cmd_wqe_cmpl+0x964 lpfc_sli4_fp_handle_cqe+0x266 __lpfc_sli4_process_cq+0x105 __lpfc_sli4_hba_process_cq+0x3c lpfc_cq_poll_hdler+0x16 irq_poll_softirq+0x76 __softirqentry_text_start+0xe4 irq_exit+0xf7 do_IRQ+0x7f CPU B: native_queued_spin_lock_slowpath+0x5b _raw_spin_lock+0x1c lpfc_abort_handler+0x13e scmd_eh_abort_handler+0x85 process_one_work+0x1a7 worker_thread+0x30 kthread+0x112 ret_from_fork+0x1f Diagram of lockup: CPUA CPUB ---- ---- lpfc_cmd->buf_lock phba->hbalock lpfc_cmd->buf_lock phba->hbalock Fix by reordering the taking of the lpfc_cmd->buf_lock and phba->hbalock in lpfc_abort_handler routine so that it tries to take the lpfc_cmd->buf_lock first before phba->hbalock. Link: https://lore.kernel.org/r/20220412222008.126521-7-jsmart2021@gmail.com Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_scsi.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 3d9175f1b6787..8c79264a935b4 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -5885,25 +5885,25 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) if (!lpfc_cmd) return ret; - spin_lock_irqsave(&phba->hbalock, flags); + /* Guard against IO completion being called at same time */ + spin_lock_irqsave(&lpfc_cmd->buf_lock, flags); + + spin_lock(&phba->hbalock); /* driver queued commands are in process of being flushed */ if (phba->hba_flag & HBA_IOQ_FLUSH) { lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP, "3168 SCSI Layer abort requested I/O has been " "flushed by LLD.\n"); ret = FAILED; - goto out_unlock; + goto out_unlock_hba; } - /* Guard against IO completion being called at same time */ - spin_lock(&lpfc_cmd->buf_lock); - if (!lpfc_cmd->pCmd) { lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP, "2873 SCSI Layer I/O Abort Request IO CMPL Status " "x%x ID %d LUN %llu\n", SUCCESS, cmnd->device->id, cmnd->device->lun); - goto out_unlock_buf; + goto out_unlock_hba; } iocb = &lpfc_cmd->cur_iocbq; @@ -5911,7 +5911,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) pring_s4 = phba->sli4_hba.hdwq[iocb->hba_wqidx].io_wq->pring; if (!pring_s4) { ret = FAILED; - goto out_unlock_buf; + goto out_unlock_hba; } spin_lock(&pring_s4->ring_lock); } @@ -5944,8 +5944,8 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) "3389 SCSI Layer I/O Abort Request is pending\n"); if (phba->sli_rev == LPFC_SLI_REV4) spin_unlock(&pring_s4->ring_lock); - spin_unlock(&lpfc_cmd->buf_lock); - spin_unlock_irqrestore(&phba->hbalock, flags); + spin_unlock(&phba->hbalock); + spin_unlock_irqrestore(&lpfc_cmd->buf_lock, flags); goto wait_for_cmpl; } @@ -5966,15 +5966,13 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) if (ret_val != IOCB_SUCCESS) { /* Indicate the IO is not being aborted by the driver. */ lpfc_cmd->waitq = NULL; - spin_unlock(&lpfc_cmd->buf_lock); - spin_unlock_irqrestore(&phba->hbalock, flags); ret = FAILED; - goto out; + goto out_unlock_hba; } /* no longer need the lock after this point */ - spin_unlock(&lpfc_cmd->buf_lock); - spin_unlock_irqrestore(&phba->hbalock, flags); + spin_unlock(&phba->hbalock); + spin_unlock_irqrestore(&lpfc_cmd->buf_lock, flags); if (phba->cfg_poll & DISABLE_FCP_RING_INT) lpfc_sli_handle_fast_ring_event(phba, @@ -6009,10 +6007,9 @@ wait_for_cmpl: out_unlock_ring: if (phba->sli_rev == LPFC_SLI_REV4) spin_unlock(&pring_s4->ring_lock); -out_unlock_buf: - spin_unlock(&lpfc_cmd->buf_lock); -out_unlock: - spin_unlock_irqrestore(&phba->hbalock, flags); +out_unlock_hba: + spin_unlock(&phba->hbalock); + spin_unlock_irqrestore(&lpfc_cmd->buf_lock, flags); out: lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP, "0749 SCSI Layer I/O Abort Request Status x%x ID %d " -- GitLab From ae373d66c427812754db5292eb1481b181daf9ce Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 12 Apr 2022 15:19:57 -0700 Subject: [PATCH 0414/4335] scsi: lpfc: Fix call trace observed during I/O with CMF enabled [ Upstream commit d6d45f67a11136cb88a70a29ab22ea6db8ae6bd5 ] The following was seen with CMF enabled: BUG: using smp_processor_id() in preemptible code: systemd-udevd/31711 kernel: caller is lpfc_update_cmf_cmd+0x214/0x420 [lpfc] kernel: CPU: 12 PID: 31711 Comm: systemd-udevd kernel: Call Trace: kernel: kernel: dump_stack_lvl+0x44/0x57 kernel: check_preemption_disabled+0xbf/0xe0 kernel: lpfc_update_cmf_cmd+0x214/0x420 [lpfc] kernel: lpfc_nvme_fcp_io_submit+0x23b4/0x4df0 [lpfc] this_cpu_ptr() calls smp_processor_id() in a preemptible context. Fix by using per_cpu_ptr() with raw_smp_processor_id() instead. Link: https://lore.kernel.org/r/20220412222008.126521-16-jsmart2021@gmail.com Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_scsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 8c79264a935b4..c6944b282e217 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -3917,7 +3917,7 @@ lpfc_update_cmf_cmpl(struct lpfc_hba *phba, else time = div_u64(time + 500, 1000); /* round it */ - cgs = this_cpu_ptr(phba->cmf_stat); + cgs = per_cpu_ptr(phba->cmf_stat, raw_smp_processor_id()); atomic64_add(size, &cgs->rcv_bytes); atomic64_add(time, &cgs->rx_latency); atomic_inc(&cgs->rx_io_cnt); @@ -3960,7 +3960,7 @@ lpfc_update_cmf_cmd(struct lpfc_hba *phba, uint32_t size) atomic_set(&phba->rx_max_read_cnt, size); } - cgs = this_cpu_ptr(phba->cmf_stat); + cgs = per_cpu_ptr(phba->cmf_stat, raw_smp_processor_id()); atomic64_add(size, &cgs->total_bytes); return 0; } -- GitLab From 2bcec28ac0b39834ee4a9e13b38ea1c7ca12ca12 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 1 Apr 2022 16:11:24 +0200 Subject: [PATCH 0415/4335] cpuidle: PSCI: Improve support for suspend-to-RAM for PSCI OSI mode [ Upstream commit 171b66e2e2e9d80b93c8cff799e6175074b22297 ] When PSCI OSI mode is supported the syscore flag is set for the CPU devices that becomes attached to their PM domains (genpds). In the suspend-to-idle case, we call dev_pm_genpd_suspend|resume() to allow genpd to properly manage the power-off/on operations (pick an idlestate and manage the on/off notifications). For suspend-to-ram, dev_pm_genpd_suspend|resume() is currently not being called, which causes a problem that the genpd on/off notifiers do not get sent as expected. This prevents the platform-specific operations from being executed, typically needed just before/after the boot CPU is being turned off/on. To deal with this problem, let's register a syscore ops for cpuidle-psci when PSCI OSI mode is being used and call dev_pm_genpd_suspend|resume() from them. In this way, genpd regains control of the PM domain topology and then sends the on/off notifications when it's appropriate. Reported-by: Maulik Shah Suggested-by: Maulik Shah Signed-off-by: Ulf Hansson Tested-by: Maulik Shah Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpuidle/cpuidle-psci.c | 46 ++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index b51b5df084500..540105ca0781f 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -131,6 +132,49 @@ static int psci_idle_cpuhp_down(unsigned int cpu) return 0; } +static void psci_idle_syscore_switch(bool suspend) +{ + bool cleared = false; + struct device *dev; + int cpu; + + for_each_possible_cpu(cpu) { + dev = per_cpu_ptr(&psci_cpuidle_data, cpu)->dev; + + if (dev && suspend) { + dev_pm_genpd_suspend(dev); + } else if (dev) { + dev_pm_genpd_resume(dev); + + /* Account for userspace having offlined a CPU. */ + if (pm_runtime_status_suspended(dev)) + pm_runtime_set_active(dev); + + /* Clear domain state to re-start fresh. */ + if (!cleared) { + psci_set_domain_state(0); + cleared = true; + } + } + } +} + +static int psci_idle_syscore_suspend(void) +{ + psci_idle_syscore_switch(true); + return 0; +} + +static void psci_idle_syscore_resume(void) +{ + psci_idle_syscore_switch(false); +} + +static struct syscore_ops psci_idle_syscore_ops = { + .suspend = psci_idle_syscore_suspend, + .resume = psci_idle_syscore_resume, +}; + static void psci_idle_init_cpuhp(void) { int err; @@ -138,6 +182,8 @@ static void psci_idle_init_cpuhp(void) if (!psci_cpuidle_use_cpuhp) return; + register_syscore_ops(&psci_idle_syscore_ops); + err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING, "cpuidle/psci:online", psci_idle_cpuhp_up, -- GitLab From a5ce7051db044290b1a95045ff03c249005a3aa4 Mon Sep 17 00:00:00 2001 From: Keita Suzuki Date: Tue, 19 Apr 2022 10:37:19 +0000 Subject: [PATCH 0416/4335] drm/amd/pm: fix double free in si_parse_power_table() [ Upstream commit f3fa2becf2fc25b6ac7cf8d8b1a2e4a86b3b72bd ] In function si_parse_power_table(), array adev->pm.dpm.ps and its member is allocated. If the allocation of each member fails, the array itself is freed and returned with an error code. However, the array is later freed again in si_dpm_fini() function which is called when the function returns an error. This leads to potential double free of the array adev->pm.dpm.ps, as well as leak of its array members, since the members are not freed in the allocation function and the array is not nulled when freed. In addition adev->pm.dpm.num_ps, which keeps track of the allocated array member, is not updated until the member allocation is successfully finished, this could also lead to either use after free, or uninitialized variable access in si_dpm_fini(). Fix this by postponing the free of the array until si_dpm_fini() and increment adev->pm.dpm.num_ps everytime the array member is allocated. Signed-off-by: Keita Suzuki Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/powerplay/si_dpm.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c index 81f82aa05ec28..66fc63f1f1c17 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c @@ -7247,17 +7247,15 @@ static int si_parse_power_table(struct amdgpu_device *adev) if (!adev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - for (i = 0; i < state_array->ucNumEntries; i++) { + for (adev->pm.dpm.num_ps = 0, i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; non_clock_array_index = power_state->v2.nonClockInfoIndex; non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) &non_clock_info_array->nonClockInfo[non_clock_array_index]; ps = kzalloc(sizeof(struct si_ps), GFP_KERNEL); - if (ps == NULL) { - kfree(adev->pm.dpm.ps); + if (ps == NULL) return -ENOMEM; - } adev->pm.dpm.ps[i].ps_priv = ps; si_parse_pplib_non_clock_info(adev, &adev->pm.dpm.ps[i], non_clock_info, @@ -7279,8 +7277,8 @@ static int si_parse_power_table(struct amdgpu_device *adev) k++; } power_state_offset += 2 + power_state->v2.ucNumDPMLevels; + adev->pm.dpm.num_ps++; } - adev->pm.dpm.num_ps = state_array->ucNumEntries; /* fill in the vce power states */ for (i = 0; i < adev->pm.dpm.num_of_vce_states; i++) { -- GitLab From 24632913453f5b74d37fd87823d7f7b4919a2854 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 21 Apr 2022 02:55:27 +0000 Subject: [PATCH 0417/4335] ASoC: rsnd: care default case on rsnd_ssiu_busif_err_status_clear() [ Upstream commit b1384d4c95088d01f4266237faabf165d3d605fc ] commit cfb7b8bf1e2d66 ("ASoC: rsnd: tidyup rsnd_ssiu_busif_err_status_clear()") merged duplicate code, but it didn't care about default case, and causes smatch warnings. smatch warnings: sound/soc/sh/rcar/ssiu.c:112 rsnd_ssiu_busif_err_status_clear() \ error: uninitialized symbol 'offset'. sound/soc/sh/rcar/ssiu.c:114 rsnd_ssiu_busif_err_status_clear() \ error: uninitialized symbol 'shift'. This patch cares it. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87r15rgn6p.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sh/rcar/ssiu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index 0d8f97633dd26..138f95dd9f4af 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -102,6 +102,8 @@ bool rsnd_ssiu_busif_err_status_clear(struct rsnd_mod *mod) shift = 1; offset = 1; break; + default: + goto out; } for (i = 0; i < 4; i++) { @@ -120,7 +122,7 @@ bool rsnd_ssiu_busif_err_status_clear(struct rsnd_mod *mod) } rsnd_mod_write(mod, reg, val); } - +out: return error; } -- GitLab From 4374b8d71ce83a2dea404368073fea676b80c3d6 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 21 Apr 2022 02:55:58 +0000 Subject: [PATCH 0418/4335] ASoC: rsnd: care return value from rsnd_node_fixed_index() [ Upstream commit d09a7db431c65aaa8303eb456439d1831ca2e6b4 ] Renesas Sound is very complex, and thus it needs to use rsnd_node_fixed_index() to know enabled pin index. It returns error if strange pin was selected, but some codes didn't check it. This patch 1) indicates error message, 2) check return value. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87pmlbgn5t.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sh/rcar/core.c | 15 ++++++++++----- sound/soc/sh/rcar/dma.c | 9 ++++++++- sound/soc/sh/rcar/rsnd.h | 2 +- sound/soc/sh/rcar/src.c | 7 ++++++- sound/soc/sh/rcar/ssi.c | 14 ++++++++++++-- sound/soc/sh/rcar/ssiu.c | 7 ++++++- 6 files changed, 43 insertions(+), 11 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 6a8fe0da7670b..af8ef2a27d341 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1159,6 +1159,7 @@ void rsnd_parse_connect_common(struct rsnd_dai *rdai, char *name, struct device_node *capture) { struct rsnd_priv *priv = rsnd_rdai_to_priv(rdai); + struct device *dev = rsnd_priv_to_dev(priv); struct device_node *np; int i; @@ -1169,7 +1170,11 @@ void rsnd_parse_connect_common(struct rsnd_dai *rdai, char *name, for_each_child_of_node(node, np) { struct rsnd_mod *mod; - i = rsnd_node_fixed_index(np, name, i); + i = rsnd_node_fixed_index(dev, np, name, i); + if (i < 0) { + of_node_put(np); + break; + } mod = mod_get(priv, i); @@ -1183,7 +1188,7 @@ void rsnd_parse_connect_common(struct rsnd_dai *rdai, char *name, of_node_put(node); } -int rsnd_node_fixed_index(struct device_node *node, char *name, int idx) +int rsnd_node_fixed_index(struct device *dev, struct device_node *node, char *name, int idx) { char node_name[16]; @@ -1210,6 +1215,8 @@ int rsnd_node_fixed_index(struct device_node *node, char *name, int idx) return idx; } + dev_err(dev, "strange node numbering (%s)", + of_node_full_name(node)); return -EINVAL; } @@ -1221,10 +1228,8 @@ int rsnd_node_count(struct rsnd_priv *priv, struct device_node *node, char *name i = 0; for_each_child_of_node(node, np) { - i = rsnd_node_fixed_index(np, name, i); + i = rsnd_node_fixed_index(dev, np, name, i); if (i < 0) { - dev_err(dev, "strange node numbering (%s)", - of_node_full_name(node)); of_node_put(np); return 0; } diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c index 03e0d4eca7815..463ab237d7bd4 100644 --- a/sound/soc/sh/rcar/dma.c +++ b/sound/soc/sh/rcar/dma.c @@ -240,12 +240,19 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod, struct dma_chan *rsnd_dma_request_channel(struct device_node *of_node, char *name, struct rsnd_mod *mod, char *x) { + struct rsnd_priv *priv = rsnd_mod_to_priv(mod); + struct device *dev = rsnd_priv_to_dev(priv); struct dma_chan *chan = NULL; struct device_node *np; int i = 0; for_each_child_of_node(of_node, np) { - i = rsnd_node_fixed_index(np, name, i); + i = rsnd_node_fixed_index(dev, np, name, i); + if (i < 0) { + chan = NULL; + of_node_put(np); + break; + } if (i == rsnd_mod_id_raw(mod) && (!chan)) chan = of_dma_request_slave_channel(np, x); diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h index 6580bab0e229b..d9cd190d7e198 100644 --- a/sound/soc/sh/rcar/rsnd.h +++ b/sound/soc/sh/rcar/rsnd.h @@ -460,7 +460,7 @@ void rsnd_parse_connect_common(struct rsnd_dai *rdai, char *name, struct device_node *playback, struct device_node *capture); int rsnd_node_count(struct rsnd_priv *priv, struct device_node *node, char *name); -int rsnd_node_fixed_index(struct device_node *node, char *name, int idx); +int rsnd_node_fixed_index(struct device *dev, struct device_node *node, char *name, int idx); int rsnd_channel_normalization(int chan); #define rsnd_runtime_channel_original(io) \ diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c index 42a100c6303d4..0ea84ae57c6ac 100644 --- a/sound/soc/sh/rcar/src.c +++ b/sound/soc/sh/rcar/src.c @@ -676,7 +676,12 @@ int rsnd_src_probe(struct rsnd_priv *priv) if (!of_device_is_available(np)) goto skip; - i = rsnd_node_fixed_index(np, SRC_NAME, i); + i = rsnd_node_fixed_index(dev, np, SRC_NAME, i); + if (i < 0) { + ret = -EINVAL; + of_node_put(np); + goto rsnd_src_probe_done; + } src = rsnd_src_get(priv, i); diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 87e606f688d3f..43c5e27dc5c86 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -1105,6 +1105,7 @@ void rsnd_parse_connect_ssi(struct rsnd_dai *rdai, struct device_node *capture) { struct rsnd_priv *priv = rsnd_rdai_to_priv(rdai); + struct device *dev = rsnd_priv_to_dev(priv); struct device_node *node; struct device_node *np; int i; @@ -1117,7 +1118,11 @@ void rsnd_parse_connect_ssi(struct rsnd_dai *rdai, for_each_child_of_node(node, np) { struct rsnd_mod *mod; - i = rsnd_node_fixed_index(np, SSI_NAME, i); + i = rsnd_node_fixed_index(dev, np, SSI_NAME, i); + if (i < 0) { + of_node_put(np); + break; + } mod = rsnd_ssi_mod_get(priv, i); @@ -1182,7 +1187,12 @@ int rsnd_ssi_probe(struct rsnd_priv *priv) if (!of_device_is_available(np)) goto skip; - i = rsnd_node_fixed_index(np, SSI_NAME, i); + i = rsnd_node_fixed_index(dev, np, SSI_NAME, i); + if (i < 0) { + ret = -EINVAL; + of_node_put(np); + goto rsnd_ssi_probe_done; + } ssi = rsnd_ssi_get(priv, i); diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index 138f95dd9f4af..4b8a63e336c77 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -462,6 +462,7 @@ void rsnd_parse_connect_ssiu(struct rsnd_dai *rdai, struct device_node *capture) { struct rsnd_priv *priv = rsnd_rdai_to_priv(rdai); + struct device *dev = rsnd_priv_to_dev(priv); struct device_node *node = rsnd_ssiu_of_node(priv); struct rsnd_dai_stream *io_p = &rdai->playback; struct rsnd_dai_stream *io_c = &rdai->capture; @@ -474,7 +475,11 @@ void rsnd_parse_connect_ssiu(struct rsnd_dai *rdai, for_each_child_of_node(node, np) { struct rsnd_mod *mod; - i = rsnd_node_fixed_index(np, SSIU_NAME, i); + i = rsnd_node_fixed_index(dev, np, SSIU_NAME, i); + if (i < 0) { + of_node_put(np); + break; + } mod = rsnd_ssiu_mod_get(priv, i); -- GitLab From 84bf55461d2bc0a8196145f217bd4781f9d9ee19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thibaut=20VAR=C3=88NE?= Date: Sun, 17 Apr 2022 16:51:45 +0200 Subject: [PATCH 0419/4335] ath9k: fix QCA9561 PA bias level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e999a5da28a0e0f7de242d841ef7d5e48f4646ae ] This patch fixes an invalid TX PA DC bias level on QCA9561, which results in a very low output power and very low throughput as devices are further away from the AP (compared to other 2.4GHz APs). This patch was suggested by Felix Fietkau, who noted[1]: "The value written to that register is wrong, because while the mask definition AR_CH0_TOP2_XPABIASLVL uses a different value for 9561, the shift definition AR_CH0_TOP2_XPABIASLVL_S is hardcoded to 12, which is wrong for 9561." In real life testing, without this patch the 2.4GHz throughput on Yuncore XD3200 is around 10Mbps sitting next to the AP, and closer to practical maximum with the patch applied. [1] https://lore.kernel.org/all/91c58969-c60e-2f41-00ac-737786d435ae@nbd.name Signed-off-by: Thibaut VARÈNE Acked-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220417145145.1847-1-hacks+kernel@slashdirt.org Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/ar9003_phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.h b/drivers/net/wireless/ath/ath9k/ar9003_phy.h index a171dbb29fbb6..ad949eb02f3d2 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.h +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.h @@ -720,7 +720,7 @@ #define AR_CH0_TOP2 (AR_SREV_9300(ah) ? 0x1628c : \ (AR_SREV_9462(ah) ? 0x16290 : 0x16284)) #define AR_CH0_TOP2_XPABIASLVL (AR_SREV_9561(ah) ? 0x1e00 : 0xf000) -#define AR_CH0_TOP2_XPABIASLVL_S 12 +#define AR_CH0_TOP2_XPABIASLVL_S (AR_SREV_9561(ah) ? 9 : 12) #define AR_CH0_XTAL (AR_SREV_9300(ah) ? 0x16294 : \ ((AR_SREV_9462(ah) || AR_SREV_9565(ah)) ? 0x16298 : \ -- GitLab From 9c385b961d4c378228e80f6abea8509cb67feab6 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 14 Jan 2022 11:02:26 +0000 Subject: [PATCH 0420/4335] media: venus: hfi: avoid null dereference in deinit [ Upstream commit 86594f6af867b5165d2ba7b5a71fae3a5961e56c ] If venus_probe fails at pm_runtime_put_sync the error handling first calls hfi_destroy and afterwards hfi_core_deinit. As hfi_destroy sets core->ops to NULL, hfi_core_deinit cannot call the core_deinit function anymore. Avoid this null pointer derefence by skipping the call when necessary. Signed-off-by: Luca Weiss Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/qcom/venus/hfi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/platform/qcom/venus/hfi.c b/drivers/media/platform/qcom/venus/hfi.c index 0f2482367e060..9bc4becdf6386 100644 --- a/drivers/media/platform/qcom/venus/hfi.c +++ b/drivers/media/platform/qcom/venus/hfi.c @@ -104,6 +104,9 @@ int hfi_core_deinit(struct venus_core *core, bool blocking) mutex_lock(&core->lock); } + if (!core->ops) + goto unlock; + ret = core->ops->core_deinit(core); if (!ret) -- GitLab From 98106f100f50c487469903b9cf6d966785fc9cc3 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Sun, 10 Apr 2022 08:34:41 +0100 Subject: [PATCH 0421/4335] media: pci: cx23885: Fix the error handling in cx23885_initdev() [ Upstream commit e8123311cf06d7dae71e8c5fe78e0510d20cd30b ] When the driver fails to call the dma_set_mask(), the driver will get the following splat: [ 55.853884] BUG: KASAN: use-after-free in __process_removed_driver+0x3c/0x240 [ 55.854486] Read of size 8 at addr ffff88810de60408 by task modprobe/590 [ 55.856822] Call Trace: [ 55.860327] __process_removed_driver+0x3c/0x240 [ 55.861347] bus_for_each_dev+0x102/0x160 [ 55.861681] i2c_del_driver+0x2f/0x50 This is because the driver has initialized the i2c related resources in cx23885_dev_setup() but not released them in error handling, fix this bug by modifying the error path that jumps after failing to call the dma_set_mask(). Signed-off-by: Zheyu Ma Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/pci/cx23885/cx23885-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c index f8f2ff3b00c37..a07b18f2034e9 100644 --- a/drivers/media/pci/cx23885/cx23885-core.c +++ b/drivers/media/pci/cx23885/cx23885-core.c @@ -2165,7 +2165,7 @@ static int cx23885_initdev(struct pci_dev *pci_dev, err = dma_set_mask(&pci_dev->dev, 0xffffffff); if (err) { pr_err("%s/0: Oops: no 32bit PCI DMA ???\n", dev->name); - goto fail_ctrl; + goto fail_dma_set_mask; } err = request_irq(pci_dev->irq, cx23885_irq, @@ -2173,7 +2173,7 @@ static int cx23885_initdev(struct pci_dev *pci_dev, if (err < 0) { pr_err("%s: can't get IRQ %d\n", dev->name, pci_dev->irq); - goto fail_irq; + goto fail_dma_set_mask; } switch (dev->board) { @@ -2195,7 +2195,7 @@ static int cx23885_initdev(struct pci_dev *pci_dev, return 0; -fail_irq: +fail_dma_set_mask: cx23885_dev_unregister(dev); fail_ctrl: v4l2_ctrl_handler_free(hdl); -- GitLab From 1f0fc1dfb5fdd456657519a97fab83691b96c6a0 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Sun, 10 Apr 2022 08:44:09 +0100 Subject: [PATCH 0422/4335] media: cx25821: Fix the warning when removing the module [ Upstream commit 2203436a4d24302871617373a7eb21bc17e38762 ] When removing the module, we will get the following warning: [ 14.746697] remove_proc_entry: removing non-empty directory 'irq/21', leaking at least 'cx25821[1]' [ 14.747449] WARNING: CPU: 4 PID: 368 at fs/proc/generic.c:717 remove_proc_entry+0x389/0x3f0 [ 14.751611] RIP: 0010:remove_proc_entry+0x389/0x3f0 [ 14.759589] Call Trace: [ 14.759792] [ 14.759975] unregister_irq_proc+0x14c/0x170 [ 14.760340] irq_free_descs+0x94/0xe0 [ 14.760640] mp_unmap_irq+0xb6/0x100 [ 14.760937] acpi_unregister_gsi_ioapic+0x27/0x40 [ 14.761334] acpi_pci_irq_disable+0x1d3/0x320 [ 14.761688] pci_disable_device+0x1ad/0x380 [ 14.762027] ? _raw_spin_unlock_irqrestore+0x2d/0x60 [ 14.762442] ? cx25821_shutdown+0x20/0x9f0 [cx25821] [ 14.762848] cx25821_finidev+0x48/0xc0 [cx25821] [ 14.763242] pci_device_remove+0x92/0x240 Fix this by freeing the irq before call pci_disable_device(). Signed-off-by: Zheyu Ma Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/pci/cx25821/cx25821-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/pci/cx25821/cx25821-core.c b/drivers/media/pci/cx25821/cx25821-core.c index 40c10ca94defe..a4192e80e9a07 100644 --- a/drivers/media/pci/cx25821/cx25821-core.c +++ b/drivers/media/pci/cx25821/cx25821-core.c @@ -1339,11 +1339,11 @@ static void cx25821_finidev(struct pci_dev *pci_dev) struct cx25821_dev *dev = get_cx25821(v4l2_dev); cx25821_shutdown(dev); - pci_disable_device(pci_dev); /* unregister stuff */ if (pci_dev->irq) free_irq(pci_dev->irq, dev); + pci_disable_device(pci_dev); cx25821_dev_unregister(dev); v4l2_device_unregister(v4l2_dev); -- GitLab From 27f672af28a8e9b783ff7f0eaf7ef2fbd5a2f4ba Mon Sep 17 00:00:00 2001 From: Heming Zhao Date: Fri, 1 Apr 2022 10:13:16 +0800 Subject: [PATCH 0423/4335] md/bitmap: don't set sb values if can't pass sanity check [ Upstream commit e68cb83a57a458b01c9739e2ad9cb70b04d1e6d2 ] If bitmap area contains invalid data, kernel will crash then mdadm triggers "Segmentation fault". This is cluster-md speical bug. In non-clustered env, mdadm will handle broken metadata case. In clustered array, only kernel space handles bitmap slot info. But even this bug only happened in clustered env, current sanity check is wrong, the code should be changed. How to trigger: (faulty injection) dd if=/dev/zero bs=1M count=1 oflag=direct of=/dev/sda dd if=/dev/zero bs=1M count=1 oflag=direct of=/dev/sdb mdadm -C /dev/md0 -b clustered -e 1.2 -n 2 -l mirror /dev/sda /dev/sdb mdadm -Ss echo aaa > magic.txt == below modifying slot 2 bitmap data == dd if=magic.txt of=/dev/sda seek=16384 bs=1 count=3 <== destroy magic dd if=/dev/zero of=/dev/sda seek=16436 bs=1 count=4 <== ZERO chunksize mdadm -A /dev/md0 /dev/sda /dev/sdb == kernel crashes. mdadm outputs "Segmentation fault" == Reason of kernel crash: In md_bitmap_read_sb (called by md_bitmap_create), bad bitmap magic didn't block chunksize assignment, and zero value made DIV_ROUND_UP_SECTOR_T() trigger "divide error". Crash log: kernel: md: md0 stopped. kernel: md/raid1:md0: not clean -- starting background reconstruction kernel: md/raid1:md0: active with 2 out of 2 mirrors kernel: dlm: ... ... kernel: md-cluster: Joined cluster 44810aba-38bb-e6b8-daca-bc97a0b254aa slot 1 kernel: md0: invalid bitmap file superblock: bad magic kernel: md_bitmap_copy_from_slot can't get bitmap from slot 2 kernel: md-cluster: Could not gather bitmaps from slot 2 kernel: divide error: 0000 [#1] SMP NOPTI kernel: CPU: 0 PID: 1603 Comm: mdadm Not tainted 5.14.6-1-default kernel: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) kernel: RIP: 0010:md_bitmap_create+0x1d1/0x850 [md_mod] kernel: RSP: 0018:ffffc22ac0843ba0 EFLAGS: 00010246 kernel: ... ... kernel: Call Trace: kernel: ? dlm_lock_sync+0xd0/0xd0 [md_cluster 77fe..7a0] kernel: md_bitmap_copy_from_slot+0x2c/0x290 [md_mod 24ea..d3a] kernel: load_bitmaps+0xec/0x210 [md_cluster 77fe..7a0] kernel: md_bitmap_load+0x81/0x1e0 [md_mod 24ea..d3a] kernel: do_md_run+0x30/0x100 [md_mod 24ea..d3a] kernel: md_ioctl+0x1290/0x15a0 [md_mod 24ea....d3a] kernel: ? mddev_unlock+0xaa/0x130 [md_mod 24ea..d3a] kernel: ? blkdev_ioctl+0xb1/0x2b0 kernel: block_ioctl+0x3b/0x40 kernel: __x64_sys_ioctl+0x7f/0xb0 kernel: do_syscall_64+0x59/0x80 kernel: ? exit_to_user_mode_prepare+0x1ab/0x230 kernel: ? syscall_exit_to_user_mode+0x18/0x40 kernel: ? do_syscall_64+0x69/0x80 kernel: entry_SYSCALL_64_after_hwframe+0x44/0xae kernel: RIP: 0033:0x7f4a15fa722b kernel: ... ... kernel: ---[ end trace 8afa7612f559c868 ]--- kernel: RIP: 0010:md_bitmap_create+0x1d1/0x850 [md_mod] Reported-by: kernel test robot Reported-by: Dan Carpenter Acked-by: Guoqing Jiang Signed-off-by: Heming Zhao Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/md-bitmap.c | 44 ++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index e29c6298ef5c9..8cc11b1987ec8 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -639,14 +639,6 @@ re_read: daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; write_behind = le32_to_cpu(sb->write_behind); sectors_reserved = le32_to_cpu(sb->sectors_reserved); - /* Setup nodes/clustername only if bitmap version is - * cluster-compatible - */ - if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) { - nodes = le32_to_cpu(sb->nodes); - strlcpy(bitmap->mddev->bitmap_info.cluster_name, - sb->cluster_name, 64); - } /* verify that the bitmap-specific fields are valid */ if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) @@ -668,6 +660,16 @@ re_read: goto out; } + /* + * Setup nodes/clustername only if bitmap version is + * cluster-compatible + */ + if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) { + nodes = le32_to_cpu(sb->nodes); + strlcpy(bitmap->mddev->bitmap_info.cluster_name, + sb->cluster_name, 64); + } + /* keep the array size field of the bitmap superblock up to date */ sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); @@ -700,9 +702,9 @@ re_read: out: kunmap_atomic(sb); - /* Assigning chunksize is required for "re_read" */ - bitmap->mddev->bitmap_info.chunksize = chunksize; if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { + /* Assigning chunksize is required for "re_read" */ + bitmap->mddev->bitmap_info.chunksize = chunksize; err = md_setup_cluster(bitmap->mddev, nodes); if (err) { pr_warn("%s: Could not setup cluster service (%d)\n", @@ -713,18 +715,18 @@ out: goto re_read; } - out_no_sb: - if (test_bit(BITMAP_STALE, &bitmap->flags)) - bitmap->events_cleared = bitmap->mddev->events; - bitmap->mddev->bitmap_info.chunksize = chunksize; - bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; - bitmap->mddev->bitmap_info.max_write_behind = write_behind; - bitmap->mddev->bitmap_info.nodes = nodes; - if (bitmap->mddev->bitmap_info.space == 0 || - bitmap->mddev->bitmap_info.space > sectors_reserved) - bitmap->mddev->bitmap_info.space = sectors_reserved; - if (err) { + if (err == 0) { + if (test_bit(BITMAP_STALE, &bitmap->flags)) + bitmap->events_cleared = bitmap->mddev->events; + bitmap->mddev->bitmap_info.chunksize = chunksize; + bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; + bitmap->mddev->bitmap_info.max_write_behind = write_behind; + bitmap->mddev->bitmap_info.nodes = nodes; + if (bitmap->mddev->bitmap_info.space == 0 || + bitmap->mddev->bitmap_info.space > sectors_reserved) + bitmap->mddev->bitmap_info.space = sectors_reserved; + } else { md_bitmap_print_sb(bitmap); if (bitmap->cluster_slot < 0) md_cluster_stop(bitmap->mddev); -- GitLab From 353298cadbd4c7d8e8a16d6000066414694933c3 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Mon, 11 Apr 2022 16:37:53 +0100 Subject: [PATCH 0424/4335] mmc: jz4740: Apply DMA engine limits to maximum segment size [ Upstream commit afadb04f1d6e74b18a253403f5274cde5e3fd7bd ] Do what is done in other DMA-enabled MMC host drivers (cf. host/mmci.c) and limit the maximum segment size based on the DMA engine's capabilities. This is needed to avoid warnings like the following with CONFIG_DMA_API_DEBUG=y. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 21 at kernel/dma/debug.c:1162 debug_dma_map_sg+0x2f4/0x39c DMA-API: jz4780-dma 13420000.dma-controller: mapping sg segment longer than device claims to support [len=98304] [max=65536] CPU: 0 PID: 21 Comm: kworker/0:1H Not tainted 5.18.0-rc1 #19 Workqueue: kblockd blk_mq_run_work_fn Stack : 81575aec 00000004 80620000 80620000 80620000 805e7358 00000009 801537ac 814c832c 806276e3 806e34b4 80620000 81575aec 00000001 81575ab8 09291444 00000000 00000000 805e7358 81575958 ffffffea 8157596c 00000000 636f6c62 6220646b 80387a70 0000000f 6d5f6b6c 80620000 00000000 81575ba4 00000009 805e170c 80896640 00000001 00010000 00000000 00000000 00006098 806e0000 ... Call Trace: [<80107670>] show_stack+0x84/0x120 [<80528cd8>] __warn+0xb8/0xec [<80528d78>] warn_slowpath_fmt+0x6c/0xb8 [<8016f1d4>] debug_dma_map_sg+0x2f4/0x39c [<80169d4c>] __dma_map_sg_attrs+0xf0/0x118 [<8016a27c>] dma_map_sg_attrs+0x14/0x28 [<804f66b4>] jz4740_mmc_prepare_dma_data+0x74/0xa4 [<804f6714>] jz4740_mmc_pre_request+0x30/0x54 [<804f4ff4>] mmc_blk_mq_issue_rq+0x6e0/0x7bc [<804f5590>] mmc_mq_queue_rq+0x220/0x2d4 [<8038b2c0>] blk_mq_dispatch_rq_list+0x480/0x664 [<80391040>] blk_mq_do_dispatch_sched+0x2dc/0x370 [<80391468>] __blk_mq_sched_dispatch_requests+0xec/0x164 [<80391540>] blk_mq_sched_dispatch_requests+0x44/0x94 [<80387900>] __blk_mq_run_hw_queue+0xb0/0xcc [<80134c14>] process_one_work+0x1b8/0x264 [<80134ff8>] worker_thread+0x2ec/0x3b8 [<8013b13c>] kthread+0x104/0x10c [<80101dcc>] ret_from_kernel_thread+0x14/0x1c ---[ end trace 0000000000000000 ]--- Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20220411153753.50443-1-aidanmacdonald.0x0@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/jz4740_mmc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index 80a2c270d502e..3c59dec08c3bd 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -235,6 +235,26 @@ static int jz4740_mmc_acquire_dma_channels(struct jz4740_mmc_host *host) return PTR_ERR(host->dma_rx); } + /* + * Limit the maximum segment size in any SG entry according to + * the parameters of the DMA engine device. + */ + if (host->dma_tx) { + struct device *dev = host->dma_tx->device->dev; + unsigned int max_seg_size = dma_get_max_seg_size(dev); + + if (max_seg_size < host->mmc->max_seg_size) + host->mmc->max_seg_size = max_seg_size; + } + + if (host->dma_rx) { + struct device *dev = host->dma_rx->device->dev; + unsigned int max_seg_size = dma_get_max_seg_size(dev); + + if (max_seg_size < host->mmc->max_seg_size) + host->mmc->max_seg_size = max_seg_size; + } + return 0; } -- GitLab From 7eb8e4787b43f499d91f3e4e9dcf905e68b0a312 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Mon, 25 Apr 2022 12:01:20 +0530 Subject: [PATCH 0425/4335] drivers: mmc: sdhci_am654: Add the quirk to set TESTCD bit [ Upstream commit c7666240ec76422cb7546bd07cc8ae80dc0ccdd2 ] The ARASAN MMC controller on Keystone 3 class of devices need the SDCD line to be connected for proper functioning. Similar to the issue pointed out in sdhci-of-arasan.c driver, commit 3794c542641f ("mmc: sdhci-of-arasan: Set controller to test mode when no CD bit"). In cases where this can't be connected, add a quirk to force the controller into test mode and set the TESTCD bit. Use the flag "ti,fails-without-test-cd", to implement this above quirk when required. Signed-off-by: Vignesh Raghavendra Signed-off-by: Aswath Govindraju Link: https://lore.kernel.org/r/20220425063120.10135-3-a-govindraju@ti.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci_am654.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index b4891bb266485..a3e62e212631f 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -147,6 +147,9 @@ struct sdhci_am654_data { int drv_strength; int strb_sel; u32 flags; + u32 quirks; + +#define SDHCI_AM654_QUIRK_FORCE_CDTEST BIT(0) }; struct sdhci_am654_driver_data { @@ -369,6 +372,21 @@ static void sdhci_am654_write_b(struct sdhci_host *host, u8 val, int reg) } } +static void sdhci_am654_reset(struct sdhci_host *host, u8 mask) +{ + u8 ctrl; + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); + + sdhci_reset(host, mask); + + if (sdhci_am654->quirks & SDHCI_AM654_QUIRK_FORCE_CDTEST) { + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); + ctrl |= SDHCI_CTRL_CDTEST_INS | SDHCI_CTRL_CDTEST_EN; + sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL); + } +} + static int sdhci_am654_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct sdhci_host *host = mmc_priv(mmc); @@ -500,7 +518,7 @@ static struct sdhci_ops sdhci_j721e_4bit_ops = { .set_clock = sdhci_j721e_4bit_set_clock, .write_b = sdhci_am654_write_b, .irq = sdhci_am654_cqhci_irq, - .reset = sdhci_reset, + .reset = sdhci_am654_reset, }; static const struct sdhci_pltfm_data sdhci_j721e_4bit_pdata = { @@ -719,6 +737,9 @@ static int sdhci_am654_get_of_property(struct platform_device *pdev, device_property_read_u32(dev, "ti,clkbuf-sel", &sdhci_am654->clkbuf_sel); + if (device_property_read_bool(dev, "ti,fails-without-test-cd")) + sdhci_am654->quirks |= SDHCI_AM654_QUIRK_FORCE_CDTEST; + sdhci_get_of_property(pdev); return 0; -- GitLab From 942ce0cba14c907eefb954b84045df220c88ebd6 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Mon, 18 Apr 2022 10:57:55 +0000 Subject: [PATCH 0426/4335] scsi: megaraid: Fix error check return value of register_chrdev() [ Upstream commit c5acd61dbb32b6bda0f3a354108f2b8dcb788985 ] If major equals 0, register_chrdev() returns an error code when it fails. This function dynamically allocates a major and returns its number on success, so we should use "< 0" to check it instead of "!". Link: https://lore.kernel.org/r/20220418105755.2558828-1-lv.ruyi@zte.com.cn Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/megaraid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 56910e94dbf2a..7dd6dd74d2bc2 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -4628,7 +4628,7 @@ static int __init megaraid_init(void) * major number allocation. */ major = register_chrdev(0, "megadev_legacy", &megadev_fops); - if (!major) { + if (major < 0) { printk(KERN_WARNING "megaraid: failed to register char device\n"); } -- GitLab From 7719a8044bf66b9f59818e50b0c7877e1397e7f4 Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Mon, 25 Apr 2022 20:41:38 +0800 Subject: [PATCH 0427/4335] drm/amdgpu/sdma: Fix incorrect calculations of the wptr of the doorbells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7dba6e838e741caadcf27ef717b6dcb561e77f89 ] This patch fixes the issue where the driver miscomputes the 64-bit values of the wptr of the SDMA doorbell when initializing the hardware. SDMA engines v4 and later on have full 64-bit registers for wptr thus they should be set properly. Older generation hardwares like CIK / SI have only 16 / 20 / 24bits for the WPTR, where the calls of lower_32_bits() will be removed in a following patch. Reviewed-by: Christian König Signed-off-by: Haohui Mai Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index e37948c157692..9014f71d52ddf 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -770,8 +770,8 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("Using doorbell -- " "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + "lower_32_bits(ring->wptr << 2) == 0x%08x " + "upper_32_bits(ring->wptr << 2) == 0x%08x\n", ring->wptr_offs, lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 50bf3b71bc93c..0f75864365d61 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -400,8 +400,8 @@ static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { DRM_DEBUG("Using doorbell -- " "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + "lower_32_bits(ring->wptr << 2) == 0x%08x " + "upper_32_bits(ring->wptr << 2) == 0x%08x\n", ring->wptr_offs, lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); @@ -782,9 +782,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr) << 2); + lower_32_bits(ring->wptr << 2)); WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr) << 2); + upper_32_bits(ring->wptr << 2)); } doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index e32efcfb0c8b1..f643b977b5f4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -287,8 +287,8 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { DRM_DEBUG("Using doorbell -- " "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + "lower_32_bits(ring->wptr << 2) == 0x%08x " + "upper_32_bits(ring->wptr << 2) == 0x%08x\n", ring->wptr_offs, lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); @@ -660,8 +660,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); } doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); -- GitLab From 3ab08d7c16531311f80a70e0637e60429af9710f Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Wed, 20 Apr 2022 09:03:52 +0000 Subject: [PATCH 0428/4335] scsi: ufs: Use pm_runtime_resume_and_get() instead of pm_runtime_get_sync() [ Upstream commit 75b8715e20a20bc7b4844835e4035543a2674200 ] Using pm_runtime_resume_and_get() to replace pm_runtime_get_sync() and pm_runtime_put_noidle(). This change is just to simplify the code, no actual functional changes. Link: https://lore.kernel.org/r/20220420090353.2588804-1-chi.minghao@zte.com.cn Reported-by: Zeal Robot Signed-off-by: Minghao Chi Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ufs/ti-j721e-ufs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ufs/ti-j721e-ufs.c b/drivers/scsi/ufs/ti-j721e-ufs.c index eafe0db98d542..122d650d08102 100644 --- a/drivers/scsi/ufs/ti-j721e-ufs.c +++ b/drivers/scsi/ufs/ti-j721e-ufs.c @@ -29,11 +29,9 @@ static int ti_j721e_ufs_probe(struct platform_device *pdev) return PTR_ERR(regbase); pm_runtime_enable(dev); - ret = pm_runtime_get_sync(dev); - if (ret < 0) { - pm_runtime_put_noidle(dev); + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) goto disable_pm; - } /* Select MPHY refclk frequency */ clk = devm_clk_get(dev, NULL); -- GitLab From 40cf4ea4d2d497f7732c87d350ba5c3f5e8a43a1 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 26 Apr 2022 11:14:19 -0700 Subject: [PATCH 0429/4335] scsi: lpfc: Fix resource leak in lpfc_sli4_send_seq_to_ulp() [ Upstream commit 646db1a560f44236b7278b822ca99a1d3b6ea72c ] If no handler is found in lpfc_complete_unsol_iocb() to match the rctl of a received frame, the frame is dropped and resources are leaked. Fix by returning resources when discarding an unhandled frame type. Update lpfc_fc_frame_check() handling of NOP basic link service. Link: https://lore.kernel.org/r/20220426181419.9154-1-jsmart2021@gmail.com Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_sli.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 68d8e55c1205c..d8d26cde70b65 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -18455,7 +18455,6 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) case FC_RCTL_ELS_REP: /* extended link services reply */ case FC_RCTL_ELS4_REQ: /* FC-4 ELS request */ case FC_RCTL_ELS4_REP: /* FC-4 ELS reply */ - case FC_RCTL_BA_NOP: /* basic link service NOP */ case FC_RCTL_BA_ABTS: /* basic link service abort */ case FC_RCTL_BA_RMC: /* remove connection */ case FC_RCTL_BA_ACC: /* basic accept */ @@ -18476,6 +18475,7 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) fc_vft_hdr = (struct fc_vft_header *)fc_hdr; fc_hdr = &((struct fc_frame_header *)fc_vft_hdr)[1]; return lpfc_fc_frame_check(phba, fc_hdr); + case FC_RCTL_BA_NOP: /* basic link service NOP */ default: goto drop; } @@ -19288,12 +19288,14 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport, if (!lpfc_complete_unsol_iocb(phba, phba->sli4_hba.els_wq->pring, iocbq, fc_hdr->fh_r_ctl, - fc_hdr->fh_type)) + fc_hdr->fh_type)) { lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, "2540 Ring %d handler: unexpected Rctl " "x%x Type x%x received\n", LPFC_ELS_RING, fc_hdr->fh_r_ctl, fc_hdr->fh_type); + lpfc_in_buf_free(phba, &seq_dmabuf->dbuf); + } /* Free iocb created in lpfc_prep_seq */ list_for_each_entry_safe(curr_iocb, next_iocb, -- GitLab From 451b9076903a057b7b8d5b24dc84b3e436a1c743 Mon Sep 17 00:00:00 2001 From: Hari Chandrakanthan Date: Sat, 23 Apr 2022 12:36:47 +0300 Subject: [PATCH 0430/4335] ath11k: disable spectral scan during spectral deinit [ Upstream commit 161c64de239c7018e0295e7e0520a19f00aa32dc ] When ath11k modules are removed using rmmod with spectral scan enabled, crash is observed. Different crash trace is observed for each crash. Send spectral scan disable WMI command to firmware before cleaning the spectral dbring in the spectral_deinit API to avoid this crash. call trace from one of the crash observed: [ 1252.880802] Unable to handle kernel NULL pointer dereference at virtual address 00000008 [ 1252.882722] pgd = 0f42e886 [ 1252.890955] [00000008] *pgd=00000000 [ 1252.893478] Internal error: Oops: 5 [#1] PREEMPT SMP ARM [ 1253.093035] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.89 #0 [ 1253.115261] Hardware name: Generic DT based system [ 1253.121149] PC is at ath11k_spectral_process_data+0x434/0x574 [ath11k] [ 1253.125940] LR is at 0x88e31017 [ 1253.132448] pc : [<7f9387b8>] lr : [<88e31017>] psr: a0000193 [ 1253.135488] sp : 80d01bc8 ip : 00000001 fp : 970e0000 [ 1253.141737] r10: 88e31000 r9 : 970ec000 r8 : 00000080 [ 1253.146946] r7 : 94734040 r6 : a0000113 r5 : 00000057 r4 : 00000000 [ 1253.152159] r3 : e18cb694 r2 : 00000217 r1 : 1df1f000 r0 : 00000001 [ 1253.158755] Flags: NzCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user [ 1253.165266] Control: 10c0383d Table: 5e71006a DAC: 00000055 [ 1253.172472] Process swapper/0 (pid: 0, stack limit = 0x60870141) [ 1253.458055] [<7f9387b8>] (ath11k_spectral_process_data [ath11k]) from [<7f917fdc>] (ath11k_dbring_buffer_release_event+0x214/0x2e4 [ath11k]) [ 1253.466139] [<7f917fdc>] (ath11k_dbring_buffer_release_event [ath11k]) from [<7f8ea3c4>] (ath11k_wmi_tlv_op_rx+0x1840/0x29cc [ath11k]) [ 1253.478807] [<7f8ea3c4>] (ath11k_wmi_tlv_op_rx [ath11k]) from [<7f8fe868>] (ath11k_htc_rx_completion_handler+0x180/0x4e0 [ath11k]) [ 1253.490699] [<7f8fe868>] (ath11k_htc_rx_completion_handler [ath11k]) from [<7f91308c>] (ath11k_ce_per_engine_service+0x2c4/0x3b4 [ath11k]) [ 1253.502386] [<7f91308c>] (ath11k_ce_per_engine_service [ath11k]) from [<7f9a4198>] (ath11k_pci_ce_tasklet+0x28/0x80 [ath11k_pci]) [ 1253.514811] [<7f9a4198>] (ath11k_pci_ce_tasklet [ath11k_pci]) from [<8032227c>] (tasklet_action_common.constprop.2+0x64/0xe8) [ 1253.526476] [<8032227c>] (tasklet_action_common.constprop.2) from [<803021e8>] (__do_softirq+0x130/0x2d0) [ 1253.537756] [<803021e8>] (__do_softirq) from [<80322610>] (irq_exit+0xcc/0xe8) [ 1253.547304] [<80322610>] (irq_exit) from [<8036a4a4>] (__handle_domain_irq+0x60/0xb4) [ 1253.554428] [<8036a4a4>] (__handle_domain_irq) from [<805eb348>] (gic_handle_irq+0x4c/0x90) [ 1253.562321] [<805eb348>] (gic_handle_irq) from [<80301a78>] (__irq_svc+0x58/0x8c) Tested-on: QCN6122 hw1.0 AHB WLAN.HK.2.6.0.1-00851-QCAHKSWPL_SILICONZ-1 Signed-off-by: Hari Chandrakanthan Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1649396345-349-1-git-send-email-quic_haric@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/spectral.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/spectral.c b/drivers/net/wireless/ath/ath11k/spectral.c index 1afe677596594..e5af9358e6105 100644 --- a/drivers/net/wireless/ath/ath11k/spectral.c +++ b/drivers/net/wireless/ath/ath11k/spectral.c @@ -214,7 +214,10 @@ static int ath11k_spectral_scan_config(struct ath11k *ar, return -ENODEV; arvif->spectral_enabled = (mode != ATH11K_SPECTRAL_DISABLED); + + spin_lock_bh(&ar->spectral.lock); ar->spectral.mode = mode; + spin_unlock_bh(&ar->spectral.lock); ret = ath11k_wmi_vdev_spectral_enable(ar, arvif->vdev_id, ATH11K_WMI_SPECTRAL_TRIGGER_CMD_CLEAR, @@ -829,9 +832,6 @@ static inline void ath11k_spectral_ring_free(struct ath11k *ar) { struct ath11k_spectral *sp = &ar->spectral; - if (!sp->enabled) - return; - ath11k_dbring_srng_cleanup(ar, &sp->rx_ring); ath11k_dbring_buf_cleanup(ar, &sp->rx_ring); } @@ -883,15 +883,16 @@ void ath11k_spectral_deinit(struct ath11k_base *ab) if (!sp->enabled) continue; - ath11k_spectral_debug_unregister(ar); - ath11k_spectral_ring_free(ar); + mutex_lock(&ar->conf_mutex); + ath11k_spectral_scan_config(ar, ATH11K_SPECTRAL_DISABLED); + mutex_unlock(&ar->conf_mutex); spin_lock_bh(&sp->lock); - - sp->mode = ATH11K_SPECTRAL_DISABLED; sp->enabled = false; - spin_unlock_bh(&sp->lock); + + ath11k_spectral_debug_unregister(ar); + ath11k_spectral_ring_free(ar); } } -- GitLab From 47187f6177917806ebb256a6b32f8de1769fbade Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 27 Apr 2022 15:49:18 +0200 Subject: [PATCH 0431/4335] ASoC: Intel: bytcr_rt5640: Add quirk for the HP Pro Tablet 408 [ Upstream commit ce216cfa84a4e1c23b105e652c550bdeaac9e922 ] Add a quirk for the HP Pro Tablet 408, this BYTCR tablet has no CHAN package in its ACPI tables and uses SSP0-AIF1 rather then SSP0-AIF2 which is the default for BYTCR devices. It also uses DMIC1 for the internal mic rather then the default IN3 and it uses JD2 rather then the default JD1 for jack-detect. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=211485 Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20220427134918.527381-1-hdegoede@redhat.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/bytcr_rt5640.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index a6e837290c7dc..f9c82ebc552cf 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -759,6 +759,18 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_OVCD_SF_0P75 | BYT_RT5640_MCLK_EN), }, + { /* HP Pro Tablet 408 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pro Tablet 408"), + }, + .driver_data = (void *)(BYT_RT5640_DMIC1_MAP | + BYT_RT5640_JD_SRC_JD2_IN4N | + BYT_RT5640_OVCD_TH_1500UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { /* HP Stream 7 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), -- GitLab From 978e3d023256bfaf34a0033d40c94e8a8e70cf3c Mon Sep 17 00:00:00 2001 From: Steven Price Date: Fri, 3 Dec 2021 10:28:15 +0000 Subject: [PATCH 0432/4335] drm/plane: Move range check for format_count earlier [ Upstream commit 4b674dd69701c2e22e8e7770c1706a69f3b17269 ] While the check for format_count > 64 in __drm_universal_plane_init() shouldn't be hit (it's a WARN_ON), in its current position it will then leak the plane->format_types array and fail to call drm_mode_object_unregister() leaking the modeset identifier. Move it to the start of the function to avoid allocating those resources in the first place. Signed-off-by: Steven Price Signed-off-by: Liviu Dudau Link: https://lore.kernel.org/dri-devel/20211203102815.38624-1-steven.price@arm.com/ Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_plane.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 82afb854141b2..fd0bf90fb4c28 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -249,6 +249,13 @@ static int __drm_universal_plane_init(struct drm_device *dev, if (WARN_ON(config->num_total_plane >= 32)) return -EINVAL; + /* + * First driver to need more than 64 formats needs to fix this. Each + * format is encoded as a bit and the current code only supports a u64. + */ + if (WARN_ON(format_count > 64)) + return -EINVAL; + WARN_ON(drm_drv_uses_atomic_modeset(dev) && (!funcs->atomic_destroy_state || !funcs->atomic_duplicate_state)); @@ -270,13 +277,6 @@ static int __drm_universal_plane_init(struct drm_device *dev, return -ENOMEM; } - /* - * First driver to need more than 64 formats needs to fix this. Each - * format is encoded as a bit and the current code only supports a u64. - */ - if (WARN_ON(format_count > 64)) - return -EINVAL; - if (format_modifiers) { const uint64_t *temp_modifiers = format_modifiers; -- GitLab From ae488dafe074e420ccf361b2633f7fbc314eaedf Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 25 Apr 2022 10:16:46 +0800 Subject: [PATCH 0433/4335] drm/amd/pm: fix the compile warning [ Upstream commit 555238d92ac32dbad2d77ad2bafc48d17391990c ] Fix the compile warning below: drivers/gpu/drm/amd/amdgpu/../pm/legacy-dpm/kv_dpm.c:1641 kv_get_acp_boot_level() warn: always true condition '(table->entries[i]->clk >= 0) => (0-u32max >= 0)' Reported-by: kernel test robot CC: Alex Deucher Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c b/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c index bcae42cef3743..6ba4c2ae69a63 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c @@ -1609,19 +1609,7 @@ static int kv_update_samu_dpm(struct amdgpu_device *adev, bool gate) static u8 kv_get_acp_boot_level(struct amdgpu_device *adev) { - u8 i; - struct amdgpu_clock_voltage_dependency_table *table = - &adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table; - - for (i = 0; i < table->count; i++) { - if (table->entries[i].clk >= 0) /* XXX */ - break; - } - - if (i >= table->count) - i = table->count - 1; - - return i; + return 0; } static void kv_update_acp_boot_level(struct amdgpu_device *adev) -- GitLab From c2272428090d0d215a3f017cbbbad731c07eee53 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Wed, 27 Apr 2022 10:37:33 +0300 Subject: [PATCH 0434/4335] ath10k: skip ath10k_halt during suspend for driver state RESTARTING [ Upstream commit b72a4aff947ba807177bdabb43debaf2c66bee05 ] Double free crash is observed when FW recovery(caused by wmi timeout/crash) is followed by immediate suspend event. The FW recovery is triggered by ath10k_core_restart() which calls driver clean up via ath10k_halt(). When the suspend event occurs between the FW recovery, the restart worker thread is put into frozen state until suspend completes. The suspend event triggers ath10k_stop() which again triggers ath10k_halt() The double invocation of ath10k_halt() causes ath10k_htt_rx_free() to be called twice(Note: ath10k_htt_rx_alloc was not called by restart worker thread because of its frozen state), causing the crash. To fix this, during the suspend flow, skip call to ath10k_halt() in ath10k_stop() when the current driver state is ATH10K_STATE_RESTARTING. Also, for driver state ATH10K_STATE_RESTARTING, call ath10k_wait_for_suspend() in ath10k_stop(). This is because call to ath10k_wait_for_suspend() is skipped later in [ath10k_halt() > ath10k_core_stop()] for the driver state ATH10K_STATE_RESTARTING. The frozen restart worker thread will be cancelled during resume when the device comes out of suspend. Below is the crash stack for reference: [ 428.469167] ------------[ cut here ]------------ [ 428.469180] kernel BUG at mm/slub.c:4150! [ 428.469193] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 428.469219] Workqueue: events_unbound async_run_entry_fn [ 428.469230] RIP: 0010:kfree+0x319/0x31b [ 428.469241] RSP: 0018:ffffa1fac015fc30 EFLAGS: 00010246 [ 428.469247] RAX: ffffedb10419d108 RBX: ffff8c05262b0000 [ 428.469252] RDX: ffff8c04a8c07000 RSI: 0000000000000000 [ 428.469256] RBP: ffffa1fac015fc78 R08: 0000000000000000 [ 428.469276] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 428.469285] Call Trace: [ 428.469295] ? dma_free_attrs+0x5f/0x7d [ 428.469320] ath10k_core_stop+0x5b/0x6f [ 428.469336] ath10k_halt+0x126/0x177 [ 428.469352] ath10k_stop+0x41/0x7e [ 428.469387] drv_stop+0x88/0x10e [ 428.469410] __ieee80211_suspend+0x297/0x411 [ 428.469441] rdev_suspend+0x6e/0xd0 [ 428.469462] wiphy_suspend+0xb1/0x105 [ 428.469483] ? name_show+0x2d/0x2d [ 428.469490] dpm_run_callback+0x8c/0x126 [ 428.469511] ? name_show+0x2d/0x2d [ 428.469517] __device_suspend+0x2e7/0x41b [ 428.469523] async_suspend+0x1f/0x93 [ 428.469529] async_run_entry_fn+0x3d/0xd1 [ 428.469535] process_one_work+0x1b1/0x329 [ 428.469541] worker_thread+0x213/0x372 [ 428.469547] kthread+0x150/0x15f [ 428.469552] ? pr_cont_work+0x58/0x58 [ 428.469558] ? kthread_blkcg+0x31/0x31 Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00288-QCARMSWPZ-1 Co-developed-by: Wen Gong Signed-off-by: Wen Gong Signed-off-by: Abhishek Kumar Reviewed-by: Brian Norris Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220426221859.v2.1.I650b809482e1af8d0156ed88b5dc2677a0711d46@changeid Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/mac.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 1f73fbfee0c06..8a80919b627f4 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -5339,13 +5339,29 @@ err: static void ath10k_stop(struct ieee80211_hw *hw) { struct ath10k *ar = hw->priv; + u32 opt; ath10k_drain_tx(ar); mutex_lock(&ar->conf_mutex); if (ar->state != ATH10K_STATE_OFF) { - if (!ar->hw_rfkill_on) - ath10k_halt(ar); + if (!ar->hw_rfkill_on) { + /* If the current driver state is RESTARTING but not yet + * fully RESTARTED because of incoming suspend event, + * then ath10k_halt() is already called via + * ath10k_core_restart() and should not be called here. + */ + if (ar->state != ATH10K_STATE_RESTARTING) { + ath10k_halt(ar); + } else { + /* Suspending here, because when in RESTARTING + * state, ath10k_core_stop() skips + * ath10k_wait_for_suspend(). + */ + opt = WMI_PDEV_SUSPEND_AND_DISABLE_INTR; + ath10k_wait_for_suspend(ar, opt); + } + } ar->state = ATH10K_STATE_OFF; } mutex_unlock(&ar->conf_mutex); -- GitLab From 621916afe8cd4f322eb12759b64a2f938d4e551d Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 25 Apr 2022 12:44:41 +0100 Subject: [PATCH 0435/4335] arm64: compat: Do not treat syscall number as ESR_ELx for a bad syscall [ Upstream commit 3fed9e551417b84038b15117732ea4505eee386b ] If a compat process tries to execute an unknown system call above the __ARM_NR_COMPAT_END number, the kernel sends a SIGILL signal to the offending process. Information about the error is printed to dmesg in compat_arm_syscall() -> arm64_notify_die() -> arm64_force_sig_fault() -> arm64_show_signal(). arm64_show_signal() interprets a non-zero value for current->thread.fault_code as an exception syndrome and displays the message associated with the ESR_ELx.EC field (bits 31:26). current->thread.fault_code is set in compat_arm_syscall() -> arm64_notify_die() with the bad syscall number instead of a valid ESR_ELx value. This means that the ESR_ELx.EC field has the value that the user set for the syscall number and the kernel can end up printing bogus exception messages*. For example, for the syscall number 0x68000000, which evaluates to ESR_ELx.EC value of 0x1A (ESR_ELx_EC_FPAC) the kernel prints this error: [ 18.349161] syscall[300]: unhandled exception: ERET/ERETAA/ERETAB, ESR 0x68000000, Oops - bad compat syscall(2) in syscall[10000+50000] [ 18.350639] CPU: 2 PID: 300 Comm: syscall Not tainted 5.18.0-rc1 #79 [ 18.351249] Hardware name: Pine64 RockPro64 v2.0 (DT) [..] which is misleading, as the bad compat syscall has nothing to do with pointer authentication. Stop arm64_show_signal() from printing exception syndrome information by having compat_arm_syscall() set the ESR_ELx value to 0, as it has no meaning for an invalid system call number. The example above now becomes: [ 19.935275] syscall[301]: unhandled exception: Oops - bad compat syscall(2) in syscall[10000+50000] [ 19.936124] CPU: 1 PID: 301 Comm: syscall Not tainted 5.18.0-rc1-00005-g7e08006d4102 #80 [ 19.936894] Hardware name: Pine64 RockPro64 v2.0 (DT) [..] which although shows less information because the syscall number, wrongfully advertised as the ESR value, is missing, it is better than showing plainly wrong information. The syscall number can be easily obtained with strace. *A 32-bit value above or equal to 0x8000_0000 is interpreted as a negative integer in compat_arm_syscal() and the condition scno < __ARM_NR_COMPAT_END evaluates to true; the syscall will exit to userspace in this case with the ENOSYS error code instead of arm64_notify_die() being called. Signed-off-by: Alexandru Elisei Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20220425114444.368693-3-alexandru.elisei@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/kernel/sys_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index db5159a3055fc..b88a52f7188fc 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -114,6 +114,6 @@ long compat_arm_syscall(struct pt_regs *regs, int scno) addr = instruction_pointer(regs) - (compat_thumb_mode(regs) ? 2 : 4); arm64_notify_die("Oops - bad compat syscall(2)", regs, - SIGILL, ILL_ILLTRP, addr, scno); + SIGILL, ILL_ILLTRP, addr, 0); return 0; } -- GitLab From 903d01a220ef297f207e338f3aa9df353ea88583 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Sun, 24 Apr 2022 03:19:59 +0000 Subject: [PATCH 0436/4335] drm: msm: fix error check return value of irq_of_parse_and_map() [ Upstream commit b9e4f1d2b505df8e2439b63e67afaa287c1c43e2 ] The irq_of_parse_and_map() function returns 0 on failure, and does not return an negative value. Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/483175/ Link: https://lore.kernel.org/r/20220424031959.3172406-1-lv.ruyi@zte.com.cn Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index b3b42672b2d47..a2b276ae96733 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -598,9 +598,9 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) } irq = irq_of_parse_and_map(pdev->dev.of_node, 0); - if (irq < 0) { - ret = irq; - DRM_DEV_ERROR(&pdev->dev, "failed to get irq: %d\n", ret); + if (!irq) { + ret = -EINVAL; + DRM_DEV_ERROR(&pdev->dev, "failed to get irq\n"); goto fail; } -- GitLab From 04be468ec13f4c9e4091c922c83ee920964e17df Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Thu, 21 Apr 2022 10:37:35 +0800 Subject: [PATCH 0437/4335] scsi: target: tcmu: Fix possible data corruption [ Upstream commit bb9b9eb0ae2e9d3f6036f0ad907c3a83dcd43485 ] When tcmu_vma_fault() gets a page successfully, before the current context completes page fault procedure, find_free_blocks() may run and call unmap_mapping_range() to unmap the page. Assume that when find_free_blocks() initially completes and the previous page fault procedure starts to run again and completes, then one truncated page has been mapped to userspace. But note that tcmu_vma_fault() has gotten a refcount for the page so any other subsystem won't be able to use the page unless the userspace address is unmapped later. If another command subsequently runs and needs to extend dbi_thresh it may reuse the corresponding slot for the previous page in data_bitmap. Then though we'll allocate new page for this slot in data_area, no page fault will happen because we have a valid map and the real request's data will be lost. Filesystem implementations will also run into this issue but they usually lock the page when vm_operations_struct->fault gets a page and unlock the page after finish_fault() completes. For truncate filesystems lock pages in truncate_inode_pages() to protect against racing wrt. page faults. To fix this possible data corruption scenario we can apply a method similar to the filesystems. For pages that are to be freed, tcmu_blocks_release() locks and unlocks. Make tcmu_vma_fault() also lock found page under cmdr_lock. At the same time, since tcmu_vma_fault() gets an extra page refcount, tcmu_blocks_release() won't free pages if pages are in page fault procedure, which means it is safe to call tcmu_blocks_release() before unmap_mapping_range(). With these changes tcmu_blocks_release() will wait for all page faults to be completed before calling unmap_mapping_range(). And later, if unmap_mapping_range() is called, it will ensure stale mappings are removed. Link: https://lore.kernel.org/r/20220421023735.9018-1-xiaoguang.wang@linux.alibaba.com Reviewed-by: Bodo Stroesser Signed-off-by: Xiaoguang Wang Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/target_core_user.c | 40 ++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 0ca5ec14d3dbe..0173f44b015a7 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -1667,6 +1668,26 @@ static u32 tcmu_blocks_release(struct tcmu_dev *udev, unsigned long first, xas_lock(&xas); xas_for_each(&xas, page, (last + 1) * udev->data_pages_per_blk - 1) { xas_store(&xas, NULL); + /* + * While reaching here there may be page faults occurring on + * the to-be-released pages. A race condition may occur if + * unmap_mapping_range() is called before page faults on these + * pages have completed; a valid but stale map is created. + * + * If another command subsequently runs and needs to extend + * dbi_thresh, it may reuse the slot corresponding to the + * previous page in data_bitmap. Though we will allocate a new + * page for the slot in data_area, no page fault will happen + * because we have a valid map. Therefore the command's data + * will be lost. + * + * We lock and unlock pages that are to be released to ensure + * all page faults have completed. This way + * unmap_mapping_range() can ensure stale maps are cleanly + * removed. + */ + lock_page(page); + unlock_page(page); __free_page(page); pages_freed++; } @@ -1822,6 +1843,7 @@ static struct page *tcmu_try_get_data_page(struct tcmu_dev *udev, uint32_t dpi) page = xa_load(&udev->data_pages, dpi); if (likely(page)) { get_page(page); + lock_page(page); mutex_unlock(&udev->cmdr_lock); return page; } @@ -1863,6 +1885,7 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf) struct page *page; unsigned long offset; void *addr; + vm_fault_t ret = 0; int mi = tcmu_find_mem_index(vmf->vma); if (mi < 0) @@ -1887,10 +1910,11 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf) page = tcmu_try_get_data_page(udev, dpi); if (!page) return VM_FAULT_SIGBUS; + ret = VM_FAULT_LOCKED; } vmf->page = page; - return 0; + return ret; } static const struct vm_operations_struct tcmu_vm_ops = { @@ -3153,12 +3177,22 @@ static void find_free_blocks(void) udev->dbi_max = block; } + /* + * Release the block pages. + * + * Also note that since tcmu_vma_fault() gets an extra page + * refcount, tcmu_blocks_release() won't free pages if pages + * are mapped. This means it is safe to call + * tcmu_blocks_release() before unmap_mapping_range() which + * drops the refcount of any pages it unmaps and thus releases + * them. + */ + pages_freed = tcmu_blocks_release(udev, start, end - 1); + /* Here will truncate the data area from off */ off = udev->data_off + (loff_t)start * udev->data_blk_size; unmap_mapping_range(udev->inode->i_mapping, off, 0, 1); - /* Release the block pages */ - pages_freed = tcmu_blocks_release(udev, start, end - 1); mutex_unlock(&udev->cmdr_lock); total_pages_freed += pages_freed; -- GitLab From 87402434ea841699507263a7c8249c383c6b7baf Mon Sep 17 00:00:00 2001 From: jianghaoran Date: Fri, 29 Apr 2022 13:38:02 +0800 Subject: [PATCH 0438/4335] ipv6: Don't send rs packets to the interface of ARPHRD_TUNNEL [ Upstream commit b52e1cce31ca721e937d517411179f9196ee6135 ] ARPHRD_TUNNEL interface can't process rs packets and will generate TX errors ex: ip tunnel add ethn mode ipip local 192.168.1.1 remote 192.168.1.2 ifconfig ethn x.x.x.x ethn: flags=209 mtu 1480 inet x.x.x.x netmask 255.255.255.255 destination x.x.x.x inet6 fe80::5efe:ac1e:3cdb prefixlen 64 scopeid 0x20 tunnel txqueuelen 1000 (IPIP Tunnel) RX packets 0 bytes 0 (0.0 B) RX errors 0 dropped 0 overruns 0 frame 0 TX packets 0 bytes 0 (0.0 B) TX errors 3 dropped 0 overruns 0 carrier 0 collisions 0 Signed-off-by: jianghaoran Link: https://lore.kernel.org/r/20220429053802.246681-1-jianghaoran@kylinos.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1ba5ff21412cf..07b868c002a33 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4219,7 +4219,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, send_rs = send_mld && ipv6_accept_ra(ifp->idev) && ifp->idev->cnf.rtr_solicits != 0 && - (dev->flags&IFF_LOOPBACK) == 0; + (dev->flags & IFF_LOOPBACK) == 0 && + (dev->type != ARPHRD_TUNNEL); read_unlock_bh(&ifp->idev->lock); /* While dad is in progress mld report's source address is in6_addrany. -- GitLab From 77d7fb14890f4dbea2c863d00d52c9946145d93e Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 15 Mar 2022 11:23:40 +0000 Subject: [PATCH 0439/4335] net/mlx5: fs, delete the FTE when there are no rules attached to it [ Upstream commit 7b0c6338597613f465d131bd939a51844a00455a ] When an FTE has no children is means all the rules where removed and the FTE can be deleted regardless of the dests_size value. While dests_size should be 0 when there are no children be extra careful not to leak memory or get firmware syndrome if the proper bookkeeping of dests_size wasn't done. Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 00834c914dc64..a197dd7ca73bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2031,16 +2031,16 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) down_write_ref_node(&fte->node, false); for (i = handle->num_rules - 1; i >= 0; i--) tree_remove_node(&handle->rule[i]->node, true); - if (fte->dests_size) { - if (fte->modify_mask) - modify_fte(fte); - up_write_ref_node(&fte->node, false); - } else if (list_empty(&fte->node.children)) { + if (list_empty(&fte->node.children)) { del_hw_fte(&fte->node); /* Avoid double call to del_hw_fte */ fte->node.del_hw_func = NULL; up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); + } else if (fte->dests_size) { + if (fte->modify_mask) + modify_fte(fte); + up_write_ref_node(&fte->node, false); } else { up_write_ref_node(&fte->node, false); } -- GitLab From 9bb5de6e0b1a82d2c2631a56006848945c425b68 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 28 Apr 2022 17:18:32 +0100 Subject: [PATCH 0440/4335] ASoC: dapm: Don't fold register value changes into notifications [ Upstream commit ad685980469b9f9b99d4d6ea05f4cb8f57cb2234 ] DAPM tracks and reports the value presented to the user from DAPM controls separately to the register value, these may diverge during initialisation or when an autodisable control is in use. When writing DAPM controls we currently report that a change has occurred if either the DAPM value or the value stored in the register has changed, meaning that if the two are out of sync we may appear to report a spurious event to userspace. Since we use this folded in value for nothing other than the value reported to userspace simply drop the folding in of the register change. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220428161833.3690050-1-broonie@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-dapm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 0b166e074457f..47b85ba5b7d6d 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3428,7 +3428,6 @@ int snd_soc_dapm_put_volsw(struct snd_kcontrol *kcontrol, update.val = val; card->update = &update; } - change |= reg_change; ret = soc_dapm_mixer_update_power(card, kcontrol, connect, rconnect); @@ -3530,7 +3529,6 @@ int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, update.val = val; card->update = &update; } - change |= reg_change; ret = soc_dapm_mux_update_power(card, kcontrol, item[0], e); -- GitLab From 1377b79917e9c59962652dbed0ccaad5ede4d910 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 4 May 2022 09:29:05 +0300 Subject: [PATCH 0441/4335] mlxsw: spectrum_dcb: Do not warn about priority changes [ Upstream commit b6b584562cbe7dc357083459d6dd5b171e12cadb ] The idea behind the warnings is that the user would get warned in case when more than one priority is configured for a given DSCP value on a netdevice. The warning is currently wrong, because dcb_ieee_getapp_mask() returns the first matching entry, not all of them, and the warning will then claim that some priority is "current", when in fact it is not. But more importantly, the warning is misleading in general. Consider the following commands: # dcb app flush dev swp19 dscp-prio # dcb app add dev swp19 dscp-prio 24:3 # dcb app replace dev swp19 dscp-prio 24:2 The last command will issue the following warning: mlxsw_spectrum3 0000:07:00.0 swp19: Ignoring new priority 2 for DSCP 24 in favor of current value of 3 The reason is that the "replace" command works by first adding the new value, and then removing all old values. This is the only way to make the replacement without causing the traffic to be prioritized to whatever the chip defaults to. The warning is issued in response to adding the new priority, and then no warning is shown when the old priority is removed. The upshot is that the canonical way to change traffic prioritization always produces a warning about ignoring the new priority, but what gets configured is in fact what the user intended. An option to just emit warning every time that the prioritization changes just to make it clear that it happened is obviously unsatisfactory. Therefore, in this patch, remove the warnings. Reported-by: Maksym Yaremchuk Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index 5f92b16913605..aff6d4f35cd2f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -168,8 +168,6 @@ static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev, static int mlxsw_sp_dcbnl_app_validate(struct net_device *dev, struct dcb_app *app) { - int prio; - if (app->priority >= IEEE_8021QAZ_MAX_TCS) { netdev_err(dev, "APP entry with priority value %u is invalid\n", app->priority); @@ -183,17 +181,6 @@ static int mlxsw_sp_dcbnl_app_validate(struct net_device *dev, app->protocol); return -EINVAL; } - - /* Warn about any DSCP APP entries with the same PID. */ - prio = fls(dcb_ieee_getapp_mask(dev, app)); - if (prio--) { - if (prio < app->priority) - netdev_warn(dev, "Choosing priority %d for DSCP %d in favor of previously-active value of %d\n", - app->priority, app->protocol, prio); - else if (prio > app->priority) - netdev_warn(dev, "Ignoring new priority %d for DSCP %d in favor of current value of %d\n", - app->priority, app->protocol, prio); - } break; case IEEE_8021QAZ_APP_SEL_ETHERTYPE: -- GitLab From da3b69a564547b9262daf1c49c0302721d7a4500 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 4 May 2022 09:29:06 +0300 Subject: [PATCH 0442/4335] mlxsw: Treat LLDP packets as control [ Upstream commit 0106668cd2f91bf913fb78972840dedfba80a3c3 ] When trapping packets for on-CPU processing, Spectrum machines differentiate between control and non-control traps. Traffic trapped through non-control traps is treated as data and kept in shared buffer in pools 0-4. Traffic trapped through control traps is kept in the dedicated control buffer 9. The advantage of marking traps as control is that pressure in the data plane does not prevent the control traffic to be processed. When the LLDP trap was introduced, it was marked as a control trap. But then in commit aed4b5721143 ("mlxsw: spectrum: PTP: Hook into packet receive path"), PTP traps were introduced. Because Ethernet-encapsulated PTP packets look to the Spectrum-1 ASIC as LLDP traffic and are trapped under the LLDP trap, this trap was reconfigured as non-control, in sync with the PTP traps. There is however no requirement that PTP traffic be handled as data. Besides, the usual encapsulation for PTP traffic is UDP, not bare Ethernet, and that is in deployments that even need PTP, which is far less common than LLDP. This is reflected by the default policer, which was not bumped up to the 19Kpps / 24Kpps that is the expected load of a PTP-enabled Spectrum-1 switch. Marking of LLDP trap as non-control was therefore probably misguided. In this patch, change it back to control. Reported-by: Maksym Yaremchuk Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 26d01adbedad3..ce6f6590a7777 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -864,7 +864,7 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = { .trap = MLXSW_SP_TRAP_CONTROL(LLDP, LLDP, TRAP), .listeners_arr = { MLXSW_RXL(mlxsw_sp_rx_ptp_listener, LLDP, TRAP_TO_CPU, - false, SP_LLDP, DISCARD), + true, SP_LLDP, DISCARD), }, }, { -- GitLab From a2c87348acebd9bacedfe20cf82341928751ff91 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 21 Apr 2022 01:21:52 -0400 Subject: [PATCH 0443/4335] drm/amdgpu/psp: move PSP memory alloc from hw_init to sw_init [ Upstream commit b95b5391684b39695887afb4a13cccee7820f5d6 ] Memory allocations should be done in sw_init. hw_init should just be hardware programming needed to initialize the IP block. This is how most other IP blocks work. Move the GPU memory allocations from psp hw_init to psp sw_init and move the memory free to sw_fini. This also fixes a potential GPU memory leak if psp hw_init fails. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 95 ++++++++++++------------- 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 86e2090bbd6e0..57e9932d8a04e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -314,7 +314,39 @@ static int psp_sw_init(void *handle) } } + ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, + amdgpu_sriov_vf(adev) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, + &psp->fw_pri_bo, + &psp->fw_pri_mc_addr, + &psp->fw_pri_buf); + if (ret) + return ret; + + ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->fence_buf_bo, + &psp->fence_buf_mc_addr, + &psp->fence_buf); + if (ret) + goto failed1; + + ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, + (void **)&psp->cmd_buf_mem); + if (ret) + goto failed2; + return 0; + +failed2: + amdgpu_bo_free_kernel(&psp->fw_pri_bo, + &psp->fw_pri_mc_addr, &psp->fw_pri_buf); +failed1: + amdgpu_bo_free_kernel(&psp->fence_buf_bo, + &psp->fence_buf_mc_addr, &psp->fence_buf); + return ret; } static int psp_sw_fini(void *handle) @@ -344,6 +376,13 @@ static int psp_sw_fini(void *handle) kfree(cmd); cmd = NULL; + amdgpu_bo_free_kernel(&psp->fw_pri_bo, + &psp->fw_pri_mc_addr, &psp->fw_pri_buf); + amdgpu_bo_free_kernel(&psp->fence_buf_bo, + &psp->fence_buf_mc_addr, &psp->fence_buf); + amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, + (void **)&psp->cmd_buf_mem); + return 0; } @@ -2580,51 +2619,18 @@ static int psp_load_fw(struct amdgpu_device *adev) struct psp_context *psp = &adev->psp; if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) { - psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */ - goto skip_memalloc; - } - - if (amdgpu_sriov_vf(adev)) { - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->fw_pri_bo, - &psp->fw_pri_mc_addr, - &psp->fw_pri_buf); + /* should not destroy ring, only stop */ + psp_ring_stop(psp, PSP_RING_TYPE__KM); } else { - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - AMDGPU_GEM_DOMAIN_GTT, - &psp->fw_pri_bo, - &psp->fw_pri_mc_addr, - &psp->fw_pri_buf); - } - - if (ret) - goto failed; - - ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->fence_buf_bo, - &psp->fence_buf_mc_addr, - &psp->fence_buf); - if (ret) - goto failed; - - ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, - (void **)&psp->cmd_buf_mem); - if (ret) - goto failed; + memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); - memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); - - ret = psp_ring_init(psp, PSP_RING_TYPE__KM); - if (ret) { - DRM_ERROR("PSP ring init failed!\n"); - goto failed; + ret = psp_ring_init(psp, PSP_RING_TYPE__KM); + if (ret) { + DRM_ERROR("PSP ring init failed!\n"); + goto failed; + } } -skip_memalloc: ret = psp_hw_start(psp); if (ret) goto failed; @@ -2730,13 +2736,6 @@ static int psp_hw_fini(void *handle) psp_tmr_terminate(psp); psp_ring_destroy(psp, PSP_RING_TYPE__KM); - amdgpu_bo_free_kernel(&psp->fw_pri_bo, - &psp->fw_pri_mc_addr, &psp->fw_pri_buf); - amdgpu_bo_free_kernel(&psp->fence_buf_bo, - &psp->fence_buf_mc_addr, &psp->fence_buf); - amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, - (void **)&psp->cmd_buf_mem); - return 0; } -- GitLab From b305469ed00420e53c66064b554531d6fbd98d5b Mon Sep 17 00:00:00 2001 From: Alice Wong Date: Mon, 2 May 2022 11:40:18 -0400 Subject: [PATCH 0444/4335] drm/amdgpu/ucode: Remove firmware load type check in amdgpu_ucode_free_bo [ Upstream commit ab0cd4a9ae5b4679b714d8dbfedc0901fecdce9f ] When psp_hw_init failed, it will set the load_type to AMDGPU_FW_LOAD_DIRECT. During amdgpu_device_ip_fini, amdgpu_ucode_free_bo checks that load_type is AMDGPU_FW_LOAD_DIRECT and skips deallocating fw_buf causing memory leak. Remove load_type check in amdgpu_ucode_free_bo. Signed-off-by: Alice Wong Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index abd8469380e51..0ed0736d515aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -723,8 +723,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev) void amdgpu_ucode_free_bo(struct amdgpu_device *adev) { - if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) - amdgpu_bo_free_kernel(&adev->firmware.fw_buf, + amdgpu_bo_free_kernel(&adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, &adev->firmware.fw_buf_ptr); } -- GitLab From 4262a0e46b08b274511ed7133363c8d8ad189082 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Fri, 29 Apr 2022 16:13:24 -0400 Subject: [PATCH 0445/4335] regulator: mt6315: Enforce regulator-compatible, not name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6d435a94ba5bb4f2ad381c0828fbae89c66b50fe ] The MT6315 PMIC dt-binding should enforce that one of the valid regulator-compatible is set in each regulator node. However it was mistakenly matching against regulator-name instead. Fix the typo. This not only fixes the compatible verification, but also lifts the regulator-name restriction, so that more meaningful names can be set for each platform. Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20220429201325.2205799-1-nfraprado@collabora.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- .../devicetree/bindings/regulator/mt6315-regulator.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml b/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml index 61dd5af80db67..5d2d989de893c 100644 --- a/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml @@ -31,7 +31,7 @@ properties: $ref: "regulator.yaml#" properties: - regulator-name: + regulator-compatible: pattern: "^vbuck[1-4]$" additionalProperties: false -- GitLab From 22e0b0b84c538b60bdf8eeceee7ab3cebf4a1a09 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Fri, 6 May 2022 15:24:25 +0800 Subject: [PATCH 0446/4335] HID: bigben: fix slab-out-of-bounds Write in bigben_probe [ Upstream commit fc4ef9d5724973193bfa5ebed181dba6de3a56db ] There is a slab-out-of-bounds Write bug in hid-bigbenff driver. The problem is the driver assumes the device must have an input but some malicious devices violate this assumption. Fix this by checking hid_device's input is non-empty before its usage. Reported-by: syzkaller Signed-off-by: Dongliang Mu Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-bigbenff.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c index 74ad8bf98bfd5..e8c5e3ac9fff1 100644 --- a/drivers/hid/hid-bigbenff.c +++ b/drivers/hid/hid-bigbenff.c @@ -347,6 +347,12 @@ static int bigben_probe(struct hid_device *hid, bigben->report = list_entry(report_list->next, struct hid_report, list); + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + error = -ENODEV; + goto error_hw_stop; + } + hidinput = list_first_entry(&hid->inputs, struct hid_input, list); set_bit(FF_RUMBLE, hidinput->input->ffbit); -- GitLab From db6d83e09f760531bb57a31fb1e80f0d5b6a68e2 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Fri, 6 May 2022 19:44:01 +0800 Subject: [PATCH 0447/4335] of: Support more than one crash kernel regions for kexec -s [ Upstream commit 8af6b91f58341325bf74ecb0389ddc0039091d84 ] When "crashkernel=X,high" is used, there may be two crash regions: high=crashk_res and low=crashk_low_res. But now the syscall kexec_file_load() only add crashk_res into "linux,usable-memory-range", this may cause the second kernel to have no available dma memory. Fix it like kexec-tools does for option -c, add both 'high' and 'low' regions into the dtb. Signed-off-by: Zhen Lei Acked-by: Rob Herring Acked-by: Baoquan He Link: https://lore.kernel.org/r/20220506114402.365-6-thunder.leizhen@huawei.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- drivers/of/kexec.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index 761fd870d1db2..72c790a3c910c 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -386,6 +386,15 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, crashk_res.end - crashk_res.start + 1); if (ret) goto out; + + if (crashk_low_res.end) { + ret = fdt_appendprop_addrrange(fdt, 0, chosen_node, + "linux,usable-memory-range", + crashk_low_res.start, + crashk_low_res.end - crashk_low_res.start + 1); + if (ret) + goto out; + } } /* add bootargs */ -- GitLab From 2200453e8910b3d5f0d6f10935008eae53dcbe8b Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 4 May 2022 18:08:52 +0100 Subject: [PATCH 0448/4335] ASoC: tscs454: Add endianness flag in snd_soc_component_driver [ Upstream commit ff69ec96b87dccb3a29edef8cec5d4fefbbc2055 ] The endianness flag is used on the CODEC side to specify an ambivalence to endian, typically because it is lost over the hardware link. This device receives audio over an I2S DAI and as such should have endianness applied. A fixup is also required to use the width directly rather than relying on the format in hw_params, now both little and big endian would be supported. It is worth noting this changes the behaviour of S24_LE to use a word length of 24 rather than 32. This would appear to be a correction since the fact S24_LE is stored as 32 bits should not be presented over the bus. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220504170905.332415-26-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tscs454.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/tscs454.c b/sound/soc/codecs/tscs454.c index 43220bb36701a..c27ca9a273e14 100644 --- a/sound/soc/codecs/tscs454.c +++ b/sound/soc/codecs/tscs454.c @@ -3120,18 +3120,17 @@ static int set_aif_sample_format(struct snd_soc_component *component, unsigned int width; int ret; - switch (format) { - case SNDRV_PCM_FORMAT_S16_LE: + switch (snd_pcm_format_width(format)) { + case 16: width = FV_WL_16; break; - case SNDRV_PCM_FORMAT_S20_3LE: + case 20: width = FV_WL_20; break; - case SNDRV_PCM_FORMAT_S24_3LE: + case 24: width = FV_WL_24; break; - case SNDRV_PCM_FORMAT_S24_LE: - case SNDRV_PCM_FORMAT_S32_LE: + case 32: width = FV_WL_32; break; default: @@ -3326,6 +3325,7 @@ static const struct snd_soc_component_driver soc_component_dev_tscs454 = { .num_dapm_routes = ARRAY_SIZE(tscs454_intercon), .controls = tscs454_snd_controls, .num_controls = ARRAY_SIZE(tscs454_snd_controls), + .endianness = 1, }; #define TSCS454_RATES SNDRV_PCM_RATE_8000_96000 -- GitLab From db6da340d66b8392dcecca0daa45051a8c1e4963 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 5 May 2022 20:55:17 -0700 Subject: [PATCH 0449/4335] scsi: lpfc: Alter FPIN stat accounting logic [ Upstream commit e6f51041450282a8668af3a8fc5c7744e81a447c ] When configuring CMF management based on signals instead of FPINs, FPIN alarm and warning statistics are not tracked. Change the behavior so that FPIN alarms and warnings are always tracked regardless of the configured mode. Similar changes are made in the CMF signal stat accounting logic. Upon receipt of a signal, only track signaled alarms and warnings. FPIN stats should not be incremented upon receipt of a signal. Link: https://lore.kernel.org/r/20220506035519.50908-11-jsmart2021@gmail.com Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_els.c | 49 +++++++++++------------------------ drivers/scsi/lpfc/lpfc_init.c | 22 ++-------------- 2 files changed, 17 insertions(+), 54 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 886006ad12a29..ce28c4a30460f 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -3777,9 +3777,6 @@ lpfc_least_capable_settings(struct lpfc_hba *phba, { u32 rsp_sig_cap = 0, drv_sig_cap = 0; u32 rsp_sig_freq_cyc = 0, rsp_sig_freq_scale = 0; - struct lpfc_cgn_info *cp; - u32 crc; - u16 sig_freq; /* Get rsp signal and frequency capabilities. */ rsp_sig_cap = be32_to_cpu(pcgd->xmt_signal_capability); @@ -3835,25 +3832,7 @@ lpfc_least_capable_settings(struct lpfc_hba *phba, } } - if (!phba->cgn_i) - return; - - /* Update signal frequency in congestion info buffer */ - cp = (struct lpfc_cgn_info *)phba->cgn_i->virt; - - /* Frequency (in ms) Signal Warning/Signal Congestion Notifications - * are received by the HBA - */ - sig_freq = phba->cgn_sig_freq; - - if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ONLY) - cp->cgn_warn_freq = cpu_to_le16(sig_freq); - if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) { - cp->cgn_alarm_freq = cpu_to_le16(sig_freq); - cp->cgn_warn_freq = cpu_to_le16(sig_freq); - } - crc = lpfc_cgn_calc_crc32(cp, LPFC_CGN_INFO_SZ, LPFC_CGN_CRC32_SEED); - cp->cgn_info_crc = cpu_to_le32(crc); + /* We are NOT recording signal frequency in congestion info buffer */ return; out_no_support: @@ -9584,11 +9563,14 @@ lpfc_els_rcv_fpin_cgn(struct lpfc_hba *phba, struct fc_tlv_desc *tlv) /* Take action here for an Alarm event */ if (phba->cmf_active_mode != LPFC_CFG_OFF) { if (phba->cgn_reg_fpin & LPFC_CGN_FPIN_ALARM) { - /* Track of alarm cnt for cgn_info */ - atomic_inc(&phba->cgn_fabric_alarm_cnt); /* Track of alarm cnt for SYNC_WQE */ atomic_inc(&phba->cgn_sync_alarm_cnt); } + /* Track alarm cnt for cgn_info regardless + * of whether CMF is configured for Signals + * or FPINs. + */ + atomic_inc(&phba->cgn_fabric_alarm_cnt); goto cleanup; } break; @@ -9596,11 +9578,14 @@ lpfc_els_rcv_fpin_cgn(struct lpfc_hba *phba, struct fc_tlv_desc *tlv) /* Take action here for a Warning event */ if (phba->cmf_active_mode != LPFC_CFG_OFF) { if (phba->cgn_reg_fpin & LPFC_CGN_FPIN_WARN) { - /* Track of warning cnt for cgn_info */ - atomic_inc(&phba->cgn_fabric_warn_cnt); /* Track of warning cnt for SYNC_WQE */ atomic_inc(&phba->cgn_sync_warn_cnt); } + /* Track warning cnt and freq for cgn_info + * regardless of whether CMF is configured for + * Signals or FPINs. + */ + atomic_inc(&phba->cgn_fabric_warn_cnt); cleanup: /* Save frequency in ms */ phba->cgn_fpin_frequency = @@ -9609,14 +9594,10 @@ cleanup: if (phba->cgn_i) { cp = (struct lpfc_cgn_info *) phba->cgn_i->virt; - if (phba->cgn_reg_fpin & - LPFC_CGN_FPIN_ALARM) - cp->cgn_alarm_freq = - cpu_to_le16(value); - if (phba->cgn_reg_fpin & - LPFC_CGN_FPIN_WARN) - cp->cgn_warn_freq = - cpu_to_le16(value); + cp->cgn_alarm_freq = + cpu_to_le16(value); + cp->cgn_warn_freq = + cpu_to_le16(value); crc = lpfc_cgn_calc_crc32 (cp, LPFC_CGN_INFO_SZ, diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index ce103c1f80629..a2694fb32b5d9 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -5795,21 +5795,8 @@ lpfc_cgn_save_evt_cnt(struct lpfc_hba *phba) /* Use the frequency found in the last rcv'ed FPIN */ value = phba->cgn_fpin_frequency; - if (phba->cgn_reg_fpin & LPFC_CGN_FPIN_WARN) - cp->cgn_warn_freq = cpu_to_le16(value); - if (phba->cgn_reg_fpin & LPFC_CGN_FPIN_ALARM) - cp->cgn_alarm_freq = cpu_to_le16(value); - - /* Frequency (in ms) Signal Warning/Signal Congestion Notifications - * are received by the HBA - */ - value = phba->cgn_sig_freq; - - if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ONLY || - phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) - cp->cgn_warn_freq = cpu_to_le16(value); - if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) - cp->cgn_alarm_freq = cpu_to_le16(value); + cp->cgn_warn_freq = cpu_to_le16(value); + cp->cgn_alarm_freq = cpu_to_le16(value); lvalue = lpfc_cgn_calc_crc32(cp, LPFC_CGN_INFO_SZ, LPFC_CGN_CRC32_SEED); @@ -6493,9 +6480,6 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli) /* Alarm overrides warning, so check that first */ if (cgn_signal->alarm_cnt) { if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) { - /* Keep track of alarm cnt for cgn_info */ - atomic_add(cgn_signal->alarm_cnt, - &phba->cgn_fabric_alarm_cnt); /* Keep track of alarm cnt for CMF_SYNC_WQE */ atomic_add(cgn_signal->alarm_cnt, &phba->cgn_sync_alarm_cnt); @@ -6504,8 +6488,6 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli) /* signal action needs to be taken */ if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ONLY || phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) { - /* Keep track of warning cnt for cgn_info */ - atomic_add(cnt, &phba->cgn_fabric_warn_cnt); /* Keep track of warning cnt for CMF_SYNC_WQE */ atomic_add(cnt, &phba->cgn_sync_warn_cnt); } -- GitLab From 6320ae1b5876c30bf98203b6a5abe8b5c45e6a04 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 May 2022 20:57:40 -0700 Subject: [PATCH 0450/4335] net: remove two BUG() from skb_checksum_help() [ Upstream commit d7ea0d9df2a6265b2b180d17ebc64b38105968fc ] I have a syzbot report that managed to get a crash in skb_checksum_help() If syzbot can trigger these BUG(), it makes sense to replace them with more friendly WARN_ON_ONCE() since skb_checksum_help() can instead return an error code. Note that syzbot will still crash there, until real bug is fixed. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 5907212c00f37..b9731b267d073 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3233,11 +3233,15 @@ int skb_checksum_help(struct sk_buff *skb) } offset = skb_checksum_start_offset(skb); - BUG_ON(offset >= skb_headlen(skb)); + ret = -EINVAL; + if (WARN_ON_ONCE(offset >= skb_headlen(skb))) + goto out; + csum = skb_checksum(skb, offset, skb->len - offset, 0); offset += skb->csum_offset; - BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); + if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb))) + goto out; ret = skb_ensure_writable(skb, offset + sizeof(__sum16)); if (ret) -- GitLab From 69b296597656dff40d6337af1beb0322f21f841d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 6 May 2022 11:33:19 +0200 Subject: [PATCH 0451/4335] s390/preempt: disable __preempt_count_add() optimization for PROFILE_ALL_BRANCHES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 63678eecec57fc51b778be3da35a397931287170 ] gcc 12 does not (always) optimize away code that should only be generated if parameters are constant and within in a certain range. This depends on various obscure kernel config options, however in particular PROFILE_ALL_BRANCHES can trigger this compile error: In function ‘__atomic_add_const’, inlined from ‘__preempt_count_add.part.0’ at ./arch/s390/include/asm/preempt.h:50:3: ./arch/s390/include/asm/atomic_ops.h:80:9: error: impossible constraint in ‘asm’ 80 | asm volatile( \ | ^~~ Workaround this by simply disabling the optimization for PROFILE_ALL_BRANCHES, since the kernel will be so slow, that this optimization won't matter at all. Reported-by: Thomas Richter Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/include/asm/preempt.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index d9d5350cc3ec3..bf15da0fedbca 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -46,10 +46,17 @@ static inline bool test_preempt_need_resched(void) static inline void __preempt_count_add(int val) { - if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) - __atomic_add_const(val, &S390_lowcore.preempt_count); - else - __atomic_add(val, &S390_lowcore.preempt_count); + /* + * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES + * enabled, gcc 12 fails to handle __builtin_constant_p(). + */ + if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) { + if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) { + __atomic_add_const(val, &S390_lowcore.preempt_count); + return; + } + } + __atomic_add(val, &S390_lowcore.preempt_count); } static inline void __preempt_count_sub(int val) -- GitLab From cab410da48c257980f9abb9deac757c201f9cba5 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 9 May 2022 10:19:07 +0530 Subject: [PATCH 0452/4335] perf/amd/ibs: Cascade pmu init functions' return value [ Upstream commit 39b2ca75eec8a33e2ffdb8aa0c4840ec3e3b472c ] IBS pmu initialization code ignores return value provided by callee functions. Fix it. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220509044914.1473-2-ravi.bangoria@amd.com Signed-off-by: Sasha Levin --- arch/x86/events/amd/ibs.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 9739019d4b67a..367ca899e6e88 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -759,9 +759,10 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) return ret; } -static __init void perf_event_ibs_init(void) +static __init int perf_event_ibs_init(void) { struct attribute **attr = ibs_op_format_attrs; + int ret; /* * Some chips fail to reset the fetch count when it is written; instead @@ -773,7 +774,9 @@ static __init void perf_event_ibs_init(void) if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10) perf_ibs_fetch.fetch_ignore_if_zero_rip = 1; - perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); + ret = perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); + if (ret) + return ret; if (ibs_caps & IBS_CAPS_OPCNT) { perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; @@ -786,15 +789,35 @@ static __init void perf_event_ibs_init(void) perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK; } - perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); + ret = perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); + if (ret) + goto err_op; + + ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); + if (ret) + goto err_nmi; - register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); + return 0; + +err_nmi: + perf_pmu_unregister(&perf_ibs_op.pmu); + free_percpu(perf_ibs_op.pcpu); + perf_ibs_op.pcpu = NULL; +err_op: + perf_pmu_unregister(&perf_ibs_fetch.pmu); + free_percpu(perf_ibs_fetch.pcpu); + perf_ibs_fetch.pcpu = NULL; + + return ret; } #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ -static __init void perf_event_ibs_init(void) { } +static __init int perf_event_ibs_init(void) +{ + return 0; +} #endif @@ -1064,9 +1087,7 @@ static __init int amd_ibs_init(void) x86_pmu_amd_ibs_starting_cpu, x86_pmu_amd_ibs_dying_cpu); - perf_event_ibs_init(); - - return 0; + return perf_event_ibs_init(); } /* Since we need the pci subsystem to init ibs we can't do this earlier: */ -- GitLab From aeca695a19f8a7a43633b6bb86c82e1dddf2ee02 Mon Sep 17 00:00:00 2001 From: Hao Jia Date: Sat, 30 Apr 2022 16:58:42 +0800 Subject: [PATCH 0453/4335] sched/core: Avoid obvious double update_rq_clock warning [ Upstream commit 2679a83731d51a744657f718fc02c3b077e47562 ] When we use raw_spin_rq_lock() to acquire the rq lock and have to update the rq clock while holding the lock, the kernel may issue a WARN_DOUBLE_CLOCK warning. Since we directly use raw_spin_rq_lock() to acquire rq lock instead of rq_lock(), there is no corresponding change to rq->clock_update_flags. In particular, we have obtained the rq lock of other CPUs, the rq->clock_update_flags of this CPU may be RQCF_UPDATED at this time, and then calling update_rq_clock() will trigger the WARN_DOUBLE_CLOCK warning. So we need to clear RQCF_UPDATED of rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning. For the sched_rt_period_timer() and migrate_task_rq_dl() cases we simply replace raw_spin_rq_lock()/raw_spin_rq_unlock() with rq_lock()/rq_unlock(). For the {pull,push}_{rt,dl}_task() cases, we add the double_rq_clock_clear_update() function to clear RQCF_UPDATED of rq->clock_update_flags, and call double_rq_clock_clear_update() before double_lock_balance()/double_rq_lock() returns to avoid the WARN_DOUBLE_CLOCK warning. Some call trace reports: Call Trace 1: sched_rt_period_timer+0x10f/0x3a0 ? enqueue_top_rt_rq+0x110/0x110 __hrtimer_run_queues+0x1a9/0x490 hrtimer_interrupt+0x10b/0x240 __sysvec_apic_timer_interrupt+0x8a/0x250 sysvec_apic_timer_interrupt+0x9a/0xd0 asm_sysvec_apic_timer_interrupt+0x12/0x20 Call Trace 2: activate_task+0x8b/0x110 push_rt_task.part.108+0x241/0x2c0 push_rt_tasks+0x15/0x30 finish_task_switch+0xaa/0x2e0 ? __switch_to+0x134/0x420 __schedule+0x343/0x8e0 ? hrtimer_start_range_ns+0x101/0x340 schedule+0x4e/0xb0 do_nanosleep+0x8e/0x160 hrtimer_nanosleep+0x89/0x120 ? hrtimer_init_sleeper+0x90/0x90 __x64_sys_nanosleep+0x96/0xd0 do_syscall_64+0x34/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Call Trace 3: deactivate_task+0x93/0xe0 pull_rt_task+0x33e/0x400 balance_rt+0x7e/0x90 __schedule+0x62f/0x8e0 do_task_dead+0x3f/0x50 do_exit+0x7b8/0xbb0 do_group_exit+0x2d/0x90 get_signal+0x9df/0x9e0 ? preempt_count_add+0x56/0xa0 ? __remove_hrtimer+0x35/0x70 arch_do_signal_or_restart+0x36/0x720 ? nanosleep_copyout+0x39/0x50 ? do_nanosleep+0x131/0x160 ? audit_filter_inodes+0xf5/0x120 exit_to_user_mode_prepare+0x10f/0x1e0 syscall_exit_to_user_mode+0x17/0x30 do_syscall_64+0x40/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Call Trace 4: update_rq_clock+0x128/0x1a0 migrate_task_rq_dl+0xec/0x310 set_task_cpu+0x84/0x1e4 try_to_wake_up+0x1d8/0x5c0 wake_up_process+0x1c/0x30 hrtimer_wakeup+0x24/0x3c __hrtimer_run_queues+0x114/0x270 hrtimer_interrupt+0xe8/0x244 arch_timer_handler_phys+0x30/0x50 handle_percpu_devid_irq+0x88/0x140 generic_handle_domain_irq+0x40/0x60 gic_handle_irq+0x48/0xe0 call_on_irq_stack+0x2c/0x60 do_interrupt_handler+0x80/0x84 Steps to reproduce: 1. Enable CONFIG_SCHED_DEBUG when compiling the kernel 2. echo 1 > /sys/kernel/debug/clear_warn_once echo "WARN_DOUBLE_CLOCK" > /sys/kernel/debug/sched/features echo "NO_RT_PUSH_IPI" > /sys/kernel/debug/sched/features 3. Run some rt/dl tasks that periodically work and sleep, e.g. Create 2*n rt or dl (90% running) tasks via rt-app (on a system with n CPUs), and Dietmar Eggemann reports Call Trace 4 when running on PREEMPT_RT kernel. Signed-off-by: Hao Jia Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dietmar Eggemann Link: https://lore.kernel.org/r/20220430085843.62939-2-jiahao.os@bytedance.com Signed-off-by: Sasha Levin --- kernel/sched/core.c | 6 +++--- kernel/sched/deadline.c | 5 +++-- kernel/sched/rt.c | 5 +++-- kernel/sched/sched.h | 28 ++++++++++++++++++++++++---- 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 779f3198b17de..838623b680311 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -531,10 +531,10 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2) swap(rq1, rq2); raw_spin_rq_lock(rq1); - if (__rq_lockp(rq1) == __rq_lockp(rq2)) - return; + if (__rq_lockp(rq1) != __rq_lockp(rq2)) + raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING); - raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING); + double_rq_clock_clear_update(rq1, rq2); } #endif diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 1f811b375bf0a..fffcb1aa77b7d 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1720,6 +1720,7 @@ out: static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused) { + struct rq_flags rf; struct rq *rq; if (READ_ONCE(p->__state) != TASK_WAKING) @@ -1731,7 +1732,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused * from try_to_wake_up(). Hence, p->pi_lock is locked, but * rq->lock is not... So, lock it */ - raw_spin_rq_lock(rq); + rq_lock(rq, &rf); if (p->dl.dl_non_contending) { update_rq_clock(rq); sub_running_bw(&p->dl, &rq->dl); @@ -1747,7 +1748,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused put_task_struct(p); } sub_rq_bw(&p->dl, &rq->dl); - raw_spin_rq_unlock(rq); + rq_unlock(rq, &rf); } static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 2758cf5f7987f..8007d087a57f9 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -885,6 +885,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) int enqueue = 0; struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); struct rq *rq = rq_of_rt_rq(rt_rq); + struct rq_flags rf; int skip; /* @@ -899,7 +900,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) if (skip) continue; - raw_spin_rq_lock(rq); + rq_lock(rq, &rf); update_rq_clock(rq); if (rt_rq->rt_time) { @@ -937,7 +938,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) if (enqueue) sched_rt_rq_enqueue(rt_rq); - raw_spin_rq_unlock(rq); + rq_unlock(rq, &rf); } if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 4f432826933da..d30dc55e6cee6 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2489,6 +2489,24 @@ unsigned long arch_scale_freq_capacity(int cpu) } #endif +#ifdef CONFIG_SCHED_DEBUG +/* + * In double_lock_balance()/double_rq_lock(), we use raw_spin_rq_lock() to + * acquire rq lock instead of rq_lock(). So at the end of these two functions + * we need to call double_rq_clock_clear_update() to clear RQCF_UPDATED of + * rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning. + */ +static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2) +{ + rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); + /* rq1 == rq2 for !CONFIG_SMP, so just clear RQCF_UPDATED once. */ +#ifdef CONFIG_SMP + rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); +#endif +} +#else +static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {} +#endif #ifdef CONFIG_SMP @@ -2554,14 +2572,15 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) __acquires(busiest->lock) __acquires(this_rq->lock) { - if (__rq_lockp(this_rq) == __rq_lockp(busiest)) - return 0; - - if (likely(raw_spin_rq_trylock(busiest))) + if (__rq_lockp(this_rq) == __rq_lockp(busiest) || + likely(raw_spin_rq_trylock(busiest))) { + double_rq_clock_clear_update(this_rq, busiest); return 0; + } if (rq_order_less(this_rq, busiest)) { raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING); + double_rq_clock_clear_update(this_rq, busiest); return 0; } @@ -2655,6 +2674,7 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2) BUG_ON(rq1 != rq2); raw_spin_rq_lock(rq1); __acquire(rq2->lock); /* Fake it out ;) */ + double_rq_clock_clear_update(rq1, rq2); } /* -- GitLab From bab45966325003e92b8ebbc3a9cada1c7141df93 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Wed, 11 May 2022 09:46:42 +0200 Subject: [PATCH 0454/4335] spi: stm32-qspi: Fix wait_cmd timeout in APM mode [ Upstream commit d83d89ea68b4726700fa87b22db075e4217e691c ] In APM mode, TCF and TEF flags are not set. To avoid timeout in stm32_qspi_wait_cmd(), don't check if TCF/TEF are set. Signed-off-by: Patrice Chotard Reported-by: eberhard.stoll@kontron.de Link: https://lore.kernel.org/r/20220511074644.558874-2-patrice.chotard@foss.st.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-stm32-qspi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c index ffdc55f87e821..dd38cb8ffbc20 100644 --- a/drivers/spi/spi-stm32-qspi.c +++ b/drivers/spi/spi-stm32-qspi.c @@ -308,7 +308,8 @@ static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi, if (!op->data.nbytes) goto wait_nobusy; - if (readl_relaxed(qspi->io_base + QSPI_SR) & SR_TCF) + if ((readl_relaxed(qspi->io_base + QSPI_SR) & SR_TCF) || + qspi->fmode == CCR_FMODE_APM) goto out; reinit_completion(&qspi->data_completion); -- GitLab From e202dad51908c942b307374cfc967800993afc78 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 10 May 2022 13:17:32 -0400 Subject: [PATCH 0455/4335] dma-debug: change allocation mode from GFP_NOWAIT to GFP_ATIOMIC [ Upstream commit 84bc4f1dbbbb5f8aa68706a96711dccb28b518e5 ] We observed the error "cacheline tracking ENOMEM, dma-debug disabled" during a light system load (copying some files). The reason for this error is that the dma_active_cacheline radix tree uses GFP_NOWAIT allocation - so it can't access the emergency memory reserves and it fails as soon as anybody reaches the watermark. This patch changes GFP_NOWAIT to GFP_ATOMIC, so that it can access the emergency memory reserves. Signed-off-by: Mikulas Patocka Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- kernel/dma/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index f8ff598596b85..ac740630c79c2 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -448,7 +448,7 @@ void debug_dma_dump_mappings(struct device *dev) * other hand, consumes a single dma_debug_entry, but inserts 'nents' * entries into the tree. */ -static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); +static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC); static DEFINE_SPINLOCK(radix_lock); #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) -- GitLab From 717c6f4570a6715bc19e8975cad1a90a1e46f4a5 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 10 May 2022 08:11:36 -0500 Subject: [PATCH 0456/4335] ACPI: PM: Block ASUS B1400CEAE from suspend to idle by default [ Upstream commit d52848620de00cde4a3a5df908e231b8c8868250 ] ASUS B1400CEAE fails to resume from suspend to idle by default. This was bisected back to commit df4f9bc4fb9c ("nvme-pci: add support for ACPI StorageD3Enable property") but this is a red herring to the problem. Before this commit the system wasn't getting into deepest sleep state. Presumably this commit is allowing entry into deepest sleep state as advertised by firmware, but there are some other problems related to the wakeup. As it is confirmed the system works properly with S3, set the default for this system to S3. Reported-by: Jian-Hong Pan Link: https://bugzilla.kernel.org/show_bug.cgi?id=215742 Signed-off-by: Mario Limonciello Tested-by: Jian-Hong Pan Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/sleep.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 7ae09e4b45927..07515139141e5 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -374,6 +374,18 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "20GGA00L00"), }, }, + /* + * ASUS B1400CEAE hangs on resume from suspend (see + * https://bugzilla.kernel.org/show_bug.cgi?id=215742). + */ + { + .callback = init_default_s3, + .ident = "ASUS B1400CEAE", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ASUS EXPERTBOOK B1400CEAE"), + }, + }, {}, }; -- GitLab From 98a0c0dea51d61ab880c6191bd354f65287b3d58 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 1 Apr 2022 07:44:53 -0500 Subject: [PATCH 0457/4335] ipmi:ssif: Check for NULL msg when handling events and messages [ Upstream commit 7602b957e2404e5f98d9a40b68f1fd27f0028712 ] Even though it's not possible to get into the SSIF_GETTING_MESSAGES and SSIF_GETTING_EVENTS states without a valid message in the msg field, it's probably best to be defensive here and check and print a log, since that means something else went wrong. Also add a default clause to that switch statement to release the lock and print a log, in case the state variable gets messed up somehow. Reported-by: Haowen Bai Signed-off-by: Corey Minyard Signed-off-by: Sasha Levin --- drivers/char/ipmi/ipmi_ssif.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 8d7a8898e80b0..f366e8e3eee3d 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -814,6 +814,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, break; case SSIF_GETTING_EVENTS: + if (!msg) { + /* Should never happen, but just in case. */ + dev_warn(&ssif_info->client->dev, + "No message set while getting events\n"); + ipmi_ssif_unlock_cond(ssif_info, flags); + break; + } + if ((result < 0) || (len < 3) || (msg->rsp[2] != 0)) { /* Error getting event, probably done. */ msg->done(msg); @@ -838,6 +846,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, break; case SSIF_GETTING_MESSAGES: + if (!msg) { + /* Should never happen, but just in case. */ + dev_warn(&ssif_info->client->dev, + "No message set while getting messages\n"); + ipmi_ssif_unlock_cond(ssif_info, flags); + break; + } + if ((result < 0) || (len < 3) || (msg->rsp[2] != 0)) { /* Error getting event, probably done. */ msg->done(msg); @@ -861,6 +877,13 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, deliver_recv_msg(ssif_info, msg); } break; + + default: + /* Should never happen, but just in case. */ + dev_warn(&ssif_info->client->dev, + "Invalid state in message done handling: %d\n", + ssif_info->ssif_state); + ipmi_ssif_unlock_cond(ssif_info, flags); } flags = ipmi_ssif_lock_cond(ssif_info, &oflags); -- GitLab From 25929a5a69cf3e58e3e5205e3b95f10acdac8d9f Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 15 Apr 2022 07:23:32 -0500 Subject: [PATCH 0458/4335] ipmi: Fix pr_fmt to avoid compilation issues [ Upstream commit 2ebaf18a0b7fb764bba6c806af99fe868cee93de ] The was it was wouldn't work in some situations, simplify it. What was there was unnecessary complexity. Reported-by: kernel test robot Signed-off-by: Corey Minyard Signed-off-by: Sasha Levin --- drivers/char/ipmi/ipmi_msghandler.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index fe91090e04a46..2badf36d4816c 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -11,8 +11,8 @@ * Copyright 2002 MontaVista Software Inc. */ -#define pr_fmt(fmt) "%s" fmt, "IPMI message handler: " -#define dev_fmt pr_fmt +#define pr_fmt(fmt) "IPMI message handler: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) #include #include -- GitLab From 03755410a8f668072502c0fdc58760e15ee6358a Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Wed, 11 May 2022 09:44:52 +0800 Subject: [PATCH 0459/4335] rtlwifi: Use pr_warn instead of WARN_ONCE [ Upstream commit ad732da434a2936128769216eddaece3b1af4588 ] This memory allocation failure can be triggered by fault injection or high pressure testing, resulting a WARN. Fix this by replacing WARN with pr_warn. Reported-by: syzkaller Signed-off-by: Dongliang Mu Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220511014453.1621366-1-dzm91@hust.edu.cn Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtlwifi/usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index 86a2368732547..a8eebafb9a7ee 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -1014,7 +1014,7 @@ int rtl_usb_probe(struct usb_interface *intf, hw = ieee80211_alloc_hw(sizeof(struct rtl_priv) + sizeof(struct rtl_usb_priv), &rtl_ops); if (!hw) { - WARN_ONCE(true, "rtl_usb: ieee80211 alloc failed\n"); + pr_warn("rtl_usb: ieee80211 alloc failed\n"); return -ENOMEM; } rtlpriv = hw->priv; -- GitLab From bf5572fb3fc42a72c4f9f71e67a11a5fd5ac00f8 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 25 Mar 2022 21:15:15 +0100 Subject: [PATCH 0460/4335] mt76: mt7921: accept rx frames with non-standard VHT MCS10-11 [ Upstream commit 3128ea016965ce9f91ddf4e1dd944724462d1698 ] The hardware receives them properly, they should not be dropped Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c index c093920a597d1..5024ddf07cbca 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c @@ -563,7 +563,7 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb) status->nss = FIELD_GET(MT_PRXV_NSTS, v0) + 1; status->encoding = RX_ENC_VHT; - if (i > 9) + if (i > 11) return -EINVAL; break; case MT_PHY_TYPE_HE_MU: -- GitLab From 4b6807b1cfb8677d8805a3fa9ee51d7d52d9eceb Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 20 Apr 2022 14:29:00 +0200 Subject: [PATCH 0461/4335] mt76: fix encap offload ethernet type check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bc98e7fdd80d215b4b55eea001023231eb8ce12e ] The driver needs to check if the format is 802.2 vs 802.3 in order to set a tx descriptor flag. skb->protocol can't be used, since it may not be properly initialized for packets coming in from a packet socket. Fix misdetection by checking the ethertype from the skb data instead Reported-by: Thibaut VARÈNE Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 4 +++- drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 7691292526e05..a8a0e6af51f85 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -799,6 +799,7 @@ mt7915_mac_write_txwi_8023(struct mt7915_dev *dev, __le32 *txwi, u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; u8 fc_type, fc_stype; + u16 ethertype; bool wmm = false; u32 val; @@ -812,7 +813,8 @@ mt7915_mac_write_txwi_8023(struct mt7915_dev *dev, __le32 *txwi, val = FIELD_PREP(MT_TXD1_HDR_FORMAT, MT_HDR_FORMAT_802_3) | FIELD_PREP(MT_TXD1_TID, tid); - if (be16_to_cpu(skb->protocol) >= ETH_P_802_3_MIN) + ethertype = get_unaligned_be16(&skb->data[12]); + if (ethertype >= ETH_P_802_3_MIN) val |= MT_TXD1_ETH_802_3; txwi[1] |= cpu_to_le32(val); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c index 5024ddf07cbca..bef8d4a76ed90 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c @@ -681,6 +681,7 @@ mt7921_mac_write_txwi_8023(struct mt7921_dev *dev, __le32 *txwi, { u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; u8 fc_type, fc_stype; + u16 ethertype; bool wmm = false; u32 val; @@ -694,7 +695,8 @@ mt7921_mac_write_txwi_8023(struct mt7921_dev *dev, __le32 *txwi, val = FIELD_PREP(MT_TXD1_HDR_FORMAT, MT_HDR_FORMAT_802_3) | FIELD_PREP(MT_TXD1_TID, tid); - if (be16_to_cpu(skb->protocol) >= ETH_P_802_3_MIN) + ethertype = get_unaligned_be16(&skb->data[12]); + if (ethertype >= ETH_P_802_3_MIN) val |= MT_TXD1_ETH_802_3; txwi[1] |= cpu_to_le32(val); -- GitLab From eeb4819e94aa69767b9e5591e70c63e8b7c5786a Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Thu, 24 Mar 2022 09:37:24 +0100 Subject: [PATCH 0462/4335] media: rga: fix possible memory leak in rga_probe [ Upstream commit a71eb6025305192e646040cd76ccacb5bd48a1b5 ] rga->m2m_dev needs to be freed when rga_probe fails. Signed-off-by: Hangyu Hua Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/rockchip/rga/rga.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c index d99ea8973b678..e3246344fb724 100644 --- a/drivers/media/platform/rockchip/rga/rga.c +++ b/drivers/media/platform/rockchip/rga/rga.c @@ -868,7 +868,7 @@ static int rga_probe(struct platform_device *pdev) ret = pm_runtime_resume_and_get(rga->dev); if (ret < 0) - goto rel_vdev; + goto rel_m2m; rga->version.major = (rga_read(rga, RGA_VERSION_INFO) >> 24) & 0xFF; rga->version.minor = (rga_read(rga, RGA_VERSION_INFO) >> 20) & 0x0F; @@ -884,7 +884,7 @@ static int rga_probe(struct platform_device *pdev) DMA_ATTR_WRITE_COMBINE); if (!rga->cmdbuf_virt) { ret = -ENOMEM; - goto rel_vdev; + goto rel_m2m; } rga->src_mmu_pages = @@ -921,6 +921,8 @@ free_src_pages: free_dma: dma_free_attrs(rga->dev, RGA_CMDBUF_SIZE, rga->cmdbuf_virt, rga->cmdbuf_phy, DMA_ATTR_WRITE_COMBINE); +rel_m2m: + v4l2_m2m_release(rga->m2m_dev); rel_vdev: video_device_release(vfd); unreg_v4l2_dev: -- GitLab From a1f2cb0a26133f2c013a42aa8d1b1b2ce691631d Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 26 Apr 2022 11:15:55 +0200 Subject: [PATCH 0463/4335] media: coda: limit frame interval enumeration to supported encoder frame sizes [ Upstream commit 67e33dd957880879e785cfea83a3aa24bd5c5577 ] Let VIDIOC_ENUM_FRAMEINTERVALS return -EINVAL if userspace queries frame intervals for frame sizes unsupported by the encoder. Fixes the following v4l2-compliance failure: fail: v4l2-test-formats.cpp(123): found frame intervals for invalid size 47x16 fail: v4l2-test-formats.cpp(282): node->codec_mask & STATEFUL_ENCODER test VIDIOC_ENUM_FMT/FRAMESIZES/FRAMEINTERVALS: FAIL [hverkuil: drop incorrect 'For decoder devices, return -ENOTTY.' in the commit log] Signed-off-by: Philipp Zabel Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/coda/coda-common.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c index 4a553f42ff0a3..befc9db16c860 100644 --- a/drivers/media/platform/coda/coda-common.c +++ b/drivers/media/platform/coda/coda-common.c @@ -1318,7 +1318,8 @@ static int coda_enum_frameintervals(struct file *file, void *fh, struct v4l2_frmivalenum *f) { struct coda_ctx *ctx = fh_to_ctx(fh); - int i; + struct coda_q_data *q_data; + const struct coda_codec *codec; if (f->index) return -EINVAL; @@ -1327,12 +1328,19 @@ static int coda_enum_frameintervals(struct file *file, void *fh, if (!ctx->vdoa && f->pixel_format == V4L2_PIX_FMT_YUYV) return -EINVAL; - for (i = 0; i < CODA_MAX_FORMATS; i++) { - if (f->pixel_format == ctx->cvd->src_formats[i] || - f->pixel_format == ctx->cvd->dst_formats[i]) - break; + if (coda_format_normalize_yuv(f->pixel_format) == V4L2_PIX_FMT_YUV420) { + q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + codec = coda_find_codec(ctx->dev, f->pixel_format, + q_data->fourcc); + } else { + codec = coda_find_codec(ctx->dev, V4L2_PIX_FMT_YUV420, + f->pixel_format); } - if (i == CODA_MAX_FORMATS) + if (!codec) + return -EINVAL; + + if (f->width < MIN_W || f->width > codec->max_w || + f->height < MIN_H || f->height > codec->max_h) return -EINVAL; f->type = V4L2_FRMIVAL_TYPE_CONTINUOUS; -- GitLab From c16cfcac77ecc3077c516ae512b79031c76945af Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Tue, 3 May 2022 17:19:20 +0200 Subject: [PATCH 0464/4335] media: hantro: HEVC: unconditionnaly set pps_{cb/cr}_qp_offset values [ Upstream commit 46c836569196f377f87a3657b330cffaf94bd727 ] Always set pps_cb_qp_offset and pps_cr_qp_offset values in Hantro/G2 register whatever is V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT flag value. The vendor code does the same to set these values. This fixes conformance test CAINIT_G_SHARP_3. Fluster HEVC score is increase by one with this patch. Signed-off-by: Benjamin Gaignard Reviewed-by: Ezequiel Garcia Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/staging/media/hantro/hantro_g2_hevc_dec.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/staging/media/hantro/hantro_g2_hevc_dec.c b/drivers/staging/media/hantro/hantro_g2_hevc_dec.c index 340efb57fd185..ee069564205a2 100644 --- a/drivers/staging/media/hantro/hantro_g2_hevc_dec.c +++ b/drivers/staging/media/hantro/hantro_g2_hevc_dec.c @@ -194,13 +194,8 @@ static void set_params(struct hantro_ctx *ctx) hantro_reg_write(vpu, &g2_max_cu_qpd_depth, 0); } - if (pps->flags & V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT) { - hantro_reg_write(vpu, &g2_cb_qp_offset, pps->pps_cb_qp_offset); - hantro_reg_write(vpu, &g2_cr_qp_offset, pps->pps_cr_qp_offset); - } else { - hantro_reg_write(vpu, &g2_cb_qp_offset, 0); - hantro_reg_write(vpu, &g2_cr_qp_offset, 0); - } + hantro_reg_write(vpu, &g2_cb_qp_offset, pps->pps_cb_qp_offset); + hantro_reg_write(vpu, &g2_cr_qp_offset, pps->pps_cr_qp_offset); hantro_reg_write(vpu, &g2_filt_offset_beta, pps->pps_beta_offset_div2); hantro_reg_write(vpu, &g2_filt_offset_tc, pps->pps_tc_offset_div2); -- GitLab From 271bea32acc48649131bd3fe5c965d2a8ec46b4b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 2 May 2022 09:17:46 +0200 Subject: [PATCH 0465/4335] media: ccs-core.c: fix failure to call clk_disable_unprepare [ Upstream commit eca89cf60b040ee2cae693ea72a0364284f3084c ] Fixes smatch warning: drivers/media/i2c/ccs/ccs-core.c:1676 ccs_power_on() warn: 'sensor->ext_clk' from clk_prepare_enable() not released on lines: 1606. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/ccs/ccs-core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c index 5363f3bcafe3d..67bb770ed63fa 100644 --- a/drivers/media/i2c/ccs/ccs-core.c +++ b/drivers/media/i2c/ccs/ccs-core.c @@ -1603,8 +1603,11 @@ static int ccs_power_on(struct device *dev) usleep_range(1000, 2000); } while (--retry); - if (!reset) - return -EIO; + if (!reset) { + dev_err(dev, "software reset failed\n"); + rval = -EIO; + goto out_cci_addr_fail; + } } if (sensor->hwcfg.i2c_addr_alt) { -- GitLab From 32c7b04d535a6e5c5174b89b12c6ae230ba8f961 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 2 May 2022 05:49:04 +0200 Subject: [PATCH 0466/4335] media: imon: reorganize serialization [ Upstream commit db264d4c66c0fe007b5d19fd007707cd0697603d ] Since usb_register_dev() from imon_init_display() from imon_probe() holds minor_rwsem while display_open() which holds driver_lock and ictx->lock is called with minor_rwsem held from usb_open(), holding driver_lock or ictx->lock when calling usb_register_dev() causes circular locking dependency problem. Since usb_deregister_dev() from imon_disconnect() holds minor_rwsem while display_open() which holds driver_lock is called with minor_rwsem held, holding driver_lock when calling usb_deregister_dev() also causes circular locking dependency problem. Sean Young explained that the problem is there are imon devices which have two usb interfaces, even though it is one device. The probe and disconnect function of both usb interfaces can run concurrently. Alan Stern responded that the driver and USB cores guarantee that when an interface is probed, both the interface and its USB device are locked. Ditto for when the disconnect callback gets run. So concurrent probing/ disconnection of multiple interfaces on the same device is not possible. Therefore, we don't need locks for handling race between imon_probe() and imon_disconnect(). But we still need to handle race between display_open() /vfd_write()/lcd_write()/display_close() and imon_disconnect(), for disconnect event can happen while file descriptors are in use. Since "struct file"->private_data is set by display_open(), vfd_write()/ lcd_write()/display_close() can assume that "struct file"->private_data is not NULL even after usb_set_intfdata(interface, NULL) was called. Replace insufficiently held driver_lock with refcount_t based management. Add a boolean flag for recording whether imon_disconnect() was already called. Use RCU for accessing this boolean flag and refcount_t. Since the boolean flag for imon_disconnect() is shared, disconnect event on either intf0 or intf1 affects both interfaces. But I assume that this change does not matter, for usually disconnect event would not happen while interfaces are in use. Link: https://syzkaller.appspot.com/bug?extid=c558267ad910fc494497 Reported-by: syzbot Signed-off-by: Tetsuo Handa Tested-by: syzbot Cc: Alan Stern Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/rc/imon.c | 99 +++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 52 deletions(-) diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c index 2ca4e86c7b9f1..97355e3ebdfd4 100644 --- a/drivers/media/rc/imon.c +++ b/drivers/media/rc/imon.c @@ -153,6 +153,24 @@ struct imon_context { const struct imon_usb_dev_descr *dev_descr; /* device description with key */ /* table for front panels */ + /* + * Fields for deferring free_imon_context(). + * + * Since reference to "struct imon_context" is stored into + * "struct file"->private_data, we need to remember + * how many file descriptors might access this "struct imon_context". + */ + refcount_t users; + /* + * Use a flag for telling display_open()/vfd_write()/lcd_write() that + * imon_disconnect() was already called. + */ + bool disconnected; + /* + * We need to wait for RCU grace period in order to allow + * display_open() to safely check ->disconnected and increment ->users. + */ + struct rcu_head rcu; }; #define TOUCH_TIMEOUT (HZ/30) @@ -160,18 +178,18 @@ struct imon_context { /* vfd character device file operations */ static const struct file_operations vfd_fops = { .owner = THIS_MODULE, - .open = &display_open, - .write = &vfd_write, - .release = &display_close, + .open = display_open, + .write = vfd_write, + .release = display_close, .llseek = noop_llseek, }; /* lcd character device file operations */ static const struct file_operations lcd_fops = { .owner = THIS_MODULE, - .open = &display_open, - .write = &lcd_write, - .release = &display_close, + .open = display_open, + .write = lcd_write, + .release = display_close, .llseek = noop_llseek, }; @@ -439,9 +457,6 @@ static struct usb_driver imon_driver = { .id_table = imon_usb_id_table, }; -/* to prevent races between open() and disconnect(), probing, etc */ -static DEFINE_MUTEX(driver_lock); - /* Module bookkeeping bits */ MODULE_AUTHOR(MOD_AUTHOR); MODULE_DESCRIPTION(MOD_DESC); @@ -481,9 +496,11 @@ static void free_imon_context(struct imon_context *ictx) struct device *dev = ictx->dev; usb_free_urb(ictx->tx_urb); + WARN_ON(ictx->dev_present_intf0); usb_free_urb(ictx->rx_urb_intf0); + WARN_ON(ictx->dev_present_intf1); usb_free_urb(ictx->rx_urb_intf1); - kfree(ictx); + kfree_rcu(ictx, rcu); dev_dbg(dev, "%s: iMON context freed\n", __func__); } @@ -499,9 +516,6 @@ static int display_open(struct inode *inode, struct file *file) int subminor; int retval = 0; - /* prevent races with disconnect */ - mutex_lock(&driver_lock); - subminor = iminor(inode); interface = usb_find_interface(&imon_driver, subminor); if (!interface) { @@ -509,13 +523,16 @@ static int display_open(struct inode *inode, struct file *file) retval = -ENODEV; goto exit; } - ictx = usb_get_intfdata(interface); - if (!ictx) { + rcu_read_lock(); + ictx = usb_get_intfdata(interface); + if (!ictx || ictx->disconnected || !refcount_inc_not_zero(&ictx->users)) { + rcu_read_unlock(); pr_err("no context found for minor %d\n", subminor); retval = -ENODEV; goto exit; } + rcu_read_unlock(); mutex_lock(&ictx->lock); @@ -533,8 +550,10 @@ static int display_open(struct inode *inode, struct file *file) mutex_unlock(&ictx->lock); + if (retval && refcount_dec_and_test(&ictx->users)) + free_imon_context(ictx); + exit: - mutex_unlock(&driver_lock); return retval; } @@ -544,16 +563,9 @@ exit: */ static int display_close(struct inode *inode, struct file *file) { - struct imon_context *ictx = NULL; + struct imon_context *ictx = file->private_data; int retval = 0; - ictx = file->private_data; - - if (!ictx) { - pr_err("no context for device\n"); - return -ENODEV; - } - mutex_lock(&ictx->lock); if (!ictx->display_supported) { @@ -568,6 +580,8 @@ static int display_close(struct inode *inode, struct file *file) } mutex_unlock(&ictx->lock); + if (refcount_dec_and_test(&ictx->users)) + free_imon_context(ictx); return retval; } @@ -934,15 +948,12 @@ static ssize_t vfd_write(struct file *file, const char __user *buf, int offset; int seq; int retval = 0; - struct imon_context *ictx; + struct imon_context *ictx = file->private_data; static const unsigned char vfd_packet6[] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF }; - ictx = file->private_data; - if (!ictx) { - pr_err_ratelimited("no context for device\n"); + if (ictx->disconnected) return -ENODEV; - } mutex_lock(&ictx->lock); @@ -1018,13 +1029,10 @@ static ssize_t lcd_write(struct file *file, const char __user *buf, size_t n_bytes, loff_t *pos) { int retval = 0; - struct imon_context *ictx; + struct imon_context *ictx = file->private_data; - ictx = file->private_data; - if (!ictx) { - pr_err_ratelimited("no context for device\n"); + if (ictx->disconnected) return -ENODEV; - } mutex_lock(&ictx->lock); @@ -2402,7 +2410,6 @@ static int imon_probe(struct usb_interface *interface, int ifnum, sysfs_err; int ret = 0; struct imon_context *ictx = NULL; - struct imon_context *first_if_ctx = NULL; u16 vendor, product; usbdev = usb_get_dev(interface_to_usbdev(interface)); @@ -2414,17 +2421,12 @@ static int imon_probe(struct usb_interface *interface, dev_dbg(dev, "%s: found iMON device (%04x:%04x, intf%d)\n", __func__, vendor, product, ifnum); - /* prevent races probing devices w/multiple interfaces */ - mutex_lock(&driver_lock); - first_if = usb_ifnum_to_if(usbdev, 0); if (!first_if) { ret = -ENODEV; goto fail; } - first_if_ctx = usb_get_intfdata(first_if); - if (ifnum == 0) { ictx = imon_init_intf0(interface, id); if (!ictx) { @@ -2432,9 +2434,11 @@ static int imon_probe(struct usb_interface *interface, ret = -ENODEV; goto fail; } + refcount_set(&ictx->users, 1); } else { /* this is the secondary interface on the device */ + struct imon_context *first_if_ctx = usb_get_intfdata(first_if); /* fail early if first intf failed to register */ if (!first_if_ctx) { @@ -2448,14 +2452,13 @@ static int imon_probe(struct usb_interface *interface, ret = -ENODEV; goto fail; } + refcount_inc(&ictx->users); } usb_set_intfdata(interface, ictx); if (ifnum == 0) { - mutex_lock(&ictx->lock); - if (product == 0xffdc && ictx->rf_device) { sysfs_err = sysfs_create_group(&interface->dev.kobj, &imon_rf_attr_group); @@ -2466,21 +2469,17 @@ static int imon_probe(struct usb_interface *interface, if (ictx->display_supported) imon_init_display(ictx, interface); - - mutex_unlock(&ictx->lock); } dev_info(dev, "iMON device (%04x:%04x, intf%d) on usb<%d:%d> initialized\n", vendor, product, ifnum, usbdev->bus->busnum, usbdev->devnum); - mutex_unlock(&driver_lock); usb_put_dev(usbdev); return 0; fail: - mutex_unlock(&driver_lock); usb_put_dev(usbdev); dev_err(dev, "unable to register, err %d\n", ret); @@ -2496,10 +2495,8 @@ static void imon_disconnect(struct usb_interface *interface) struct device *dev; int ifnum; - /* prevent races with multi-interface device probing and display_open */ - mutex_lock(&driver_lock); - ictx = usb_get_intfdata(interface); + ictx->disconnected = true; dev = ictx->dev; ifnum = interface->cur_altsetting->desc.bInterfaceNumber; @@ -2540,11 +2537,9 @@ static void imon_disconnect(struct usb_interface *interface) } } - if (!ictx->dev_present_intf0 && !ictx->dev_present_intf1) + if (refcount_dec_and_test(&ictx->users)) free_imon_context(ictx); - mutex_unlock(&driver_lock); - dev_dbg(dev, "%s: iMON device (intf%d) disconnected\n", __func__, ifnum); } -- GitLab From 35121d0bb7adde61c54a62408b5612b034e44af3 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 6 May 2022 09:43:25 +0200 Subject: [PATCH 0467/4335] media: cec-adap.c: fix is_configuring state [ Upstream commit 59267fc34f4900dcd2ec3295f6be04b79aee2186 ] If an adapter is trying to claim a free logical address then it is in the 'is_configuring' state. If during that process the cable is disconnected (HPD goes low, which in turn invalidates the physical address), then cec_adap_unconfigure() is called, and that set the is_configuring boolean to false, even though the thread that's trying to claim an LA is still running. Don't touch the is_configuring bool in cec_adap_unconfigure(), it will eventually be cleared by the thread. By making that change the cec_config_log_addr() function also had to change: it was aborting if is_configuring became false (since that is what cec_adap_unconfigure() did), but that no longer works. Instead check if the physical address is invalid. That is a much more appropriate check anyway. This fixes a bug where the the adapter could be disabled even though the device was still configuring. This could cause POLL transmits to time out. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/cec/core/cec-adap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c index 1f599e300e42e..67776a0d31e8c 100644 --- a/drivers/media/cec/core/cec-adap.c +++ b/drivers/media/cec/core/cec-adap.c @@ -1272,7 +1272,7 @@ static int cec_config_log_addr(struct cec_adapter *adap, * While trying to poll the physical address was reset * and the adapter was unconfigured, so bail out. */ - if (!adap->is_configuring) + if (adap->phys_addr == CEC_PHYS_ADDR_INVALID) return -EINTR; if (err) @@ -1329,7 +1329,6 @@ static void cec_adap_unconfigure(struct cec_adapter *adap) adap->phys_addr != CEC_PHYS_ADDR_INVALID) WARN_ON(adap->ops->adap_log_addr(adap, CEC_LOG_ADDR_INVALID)); adap->log_addrs.log_addr_mask = 0; - adap->is_configuring = false; adap->is_configured = false; cec_flush(adap); wake_up_interruptible(&adap->kthread_waitq); @@ -1521,9 +1520,10 @@ unconfigure: for (i = 0; i < las->num_log_addrs; i++) las->log_addr[i] = CEC_LOG_ADDR_INVALID; cec_adap_unconfigure(adap); + adap->is_configuring = false; adap->kthread_config = NULL; - mutex_unlock(&adap->lock); complete(&adap->config_completion); + mutex_unlock(&adap->lock); return 0; } -- GitLab From 6d5deb242874d924beccf7eb3cef04c1c3b0da79 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 12 May 2022 10:42:01 +0200 Subject: [PATCH 0468/4335] usbnet: Run unregister_netdev() before unbind() again [ Upstream commit d1408f6b4dd78fb1b9e26bcf64477984e5f85409 ] Commit 2c9d6c2b871d ("usbnet: run unbind() before unregister_netdev()") sought to fix a use-after-free on disconnect of USB Ethernet adapters. It turns out that a different fix is necessary to address the issue: https://lore.kernel.org/netdev/18b3541e5372bc9b9fc733d422f4e698c089077c.1650177997.git.lukas@wunner.de/ So the commit was not necessary. The commit made binding and unbinding of USB Ethernet asymmetrical: Before, usbnet_probe() first invoked the ->bind() callback and then register_netdev(). usbnet_disconnect() mirrored that by first invoking unregister_netdev() and then ->unbind(). Since the commit, the order in usbnet_disconnect() is reversed and no longer mirrors usbnet_probe(). One consequence is that a PHY disconnected (and stopped) in ->unbind() is afterwards stopped once more by unregister_netdev() as it closes the netdev before unregistering. That necessitates a contortion in ->stop() because the PHY may only be stopped if it hasn't already been disconnected. Reverting the commit allows making the call to phy_stop() unconditional in ->stop(). Tested-by: Oleksij Rempel # LAN9514/9512/9500 Tested-by: Ferry Toth # LAN9514 Signed-off-by: Lukas Wunner Acked-by: Oliver Neukum Cc: Martyn Welch Cc: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/asix_devices.c | 6 +----- drivers/net/usb/smsc95xx.c | 3 +-- drivers/net/usb/usbnet.c | 6 +++--- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index bd8f8619ad6f2..396505396a2e4 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -799,11 +799,7 @@ static int ax88772_stop(struct usbnet *dev) { struct asix_common_private *priv = dev->driver_priv; - /* On unplugged USB, we will get MDIO communication errors and the - * PHY will be set in to PHY_HALTED state. - */ - if (priv->phydev->state != PHY_HALTED) - phy_stop(priv->phydev); + phy_stop(priv->phydev); return 0; } diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index eb0d325e92b76..4e39e43450848 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1217,8 +1217,7 @@ static int smsc95xx_start_phy(struct usbnet *dev) static int smsc95xx_stop(struct usbnet *dev) { - if (dev->net->phydev) - phy_stop(dev->net->phydev); + phy_stop(dev->net->phydev); return 0; } diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index a33d7fb82a00b..af2bbaff24782 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1614,9 +1614,6 @@ void usbnet_disconnect (struct usb_interface *intf) xdev->bus->bus_name, xdev->devpath, dev->driver_info->description); - if (dev->driver_info->unbind) - dev->driver_info->unbind(dev, intf); - net = dev->net; unregister_netdev (net); @@ -1624,6 +1621,9 @@ void usbnet_disconnect (struct usb_interface *intf) usb_scuttle_anchored_urbs(&dev->deferred); + if (dev->driver_info->unbind) + dev->driver_info->unbind(dev, intf); + usb_kill_urb(dev->interrupt); usb_free_urb(dev->interrupt); kfree(dev->padding_pkt); -- GitLab From 48d32e41bbfe3cfa5a9811e3a1959e628e196391 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 23 Apr 2022 21:11:41 +0200 Subject: [PATCH 0469/4335] openrisc: start CPU timer early in boot [ Upstream commit 516dd4aacd67a0f27da94f3fe63fe0f4dbab6e2b ] In order to measure the boot process, the timer should be switched on as early in boot as possible. As well, the commit defines the get_cycles macro, like the previous patches in this series, so that generic code is aware that it's implemented by the platform, as is done on other archs. Cc: Thomas Gleixner Cc: Arnd Bergmann Cc: Jonas Bonn Cc: Stefan Kristiansson Acked-by: Stafford Horne Reported-by: Guenter Roeck Signed-off-by: Jason A. Donenfeld Signed-off-by: Sasha Levin --- arch/openrisc/include/asm/timex.h | 1 + arch/openrisc/kernel/head.S | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/arch/openrisc/include/asm/timex.h b/arch/openrisc/include/asm/timex.h index d52b4e536e3f9..5487fa93dd9be 100644 --- a/arch/openrisc/include/asm/timex.h +++ b/arch/openrisc/include/asm/timex.h @@ -23,6 +23,7 @@ static inline cycles_t get_cycles(void) { return mfspr(SPR_TTCR); } +#define get_cycles get_cycles /* This isn't really used any more */ #define CLOCK_TICK_RATE 1000 diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index 15f1b38dfe03b..871f4c8588595 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -521,6 +521,15 @@ _start: l.ori r3,r0,0x1 l.mtspr r0,r3,SPR_SR + /* + * Start the TTCR as early as possible, so that the RNG can make use of + * measurements of boot time from the earliest opportunity. Especially + * important is that the TTCR does not return zero by the time we reach + * rand_initialize(). + */ + l.movhi r3,hi(SPR_TTMR_CR) + l.mtspr r0,r3,SPR_TTMR + CLEAR_GPR(r1) CLEAR_GPR(r2) CLEAR_GPR(r3) -- GitLab From 906c81dba8ee8057523859b5e1a2479e9fd34860 Mon Sep 17 00:00:00 2001 From: "Smith, Kyle Miller (Nimble Kernel)" Date: Fri, 22 Apr 2022 14:40:32 +0000 Subject: [PATCH 0470/4335] nvme-pci: fix a NULL pointer dereference in nvme_alloc_admin_tags [ Upstream commit da42761181627e9bdc37d18368b827948a583929 ] In nvme_alloc_admin_tags, the admin_q can be set to an error (typically -ENOMEM) if the blk_mq_init_queue call fails to set up the queue, which is checked immediately after the call. However, when we return the error message up the stack, to nvme_reset_work the error takes us to nvme_remove_dead_ctrl() nvme_dev_disable() nvme_suspend_queue(&dev->queues[0]). Here, we only check that the admin_q is non-NULL, rather than not an error or NULL, and begin quiescing a queue that never existed, leading to bad / NULL pointer dereference. Signed-off-by: Kyle Smith Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e6f55cf6e494a..3ddd24a42043d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1680,6 +1680,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset); if (IS_ERR(dev->ctrl.admin_q)) { blk_mq_free_tag_set(&dev->admin_tagset); + dev->ctrl.admin_q = NULL; return -ENOMEM; } if (!blk_get_queue(dev->ctrl.admin_q)) { -- GitLab From 061a6159cea583f1155f67d1915917a6b9282662 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Mon, 16 May 2022 17:20:35 +0800 Subject: [PATCH 0471/4335] ASoC: rt5645: Fix errorenous cleanup order [ Upstream commit 2def44d3aec59e38d2701c568d65540783f90f2f ] There is a logic error when removing rt5645 device as the function rt5645_i2c_remove() first cancel the &rt5645->jack_detect_work and delete the &rt5645->btn_check_timer latter. However, since the timer handler rt5645_btn_check_callback() will re-queue the jack_detect_work, this cleanup order is buggy. That is, once the del_timer_sync in rt5645_i2c_remove is concurrently run with the rt5645_btn_check_callback, the canceled jack_detect_work will be rescheduled again, leading to possible use-after-free. This patch fix the issue by placing the del_timer_sync function before the cancel_delayed_work_sync. Signed-off-by: Lin Ma Link: https://lore.kernel.org/r/20220516092035.28283-1-linma@zju.edu.cn Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5645.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 9408ee63cb268..8ea6d43335626 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -4154,9 +4154,14 @@ static int rt5645_i2c_remove(struct i2c_client *i2c) if (i2c->irq) free_irq(i2c->irq, rt5645); + /* + * Since the rt5645_btn_check_callback() can queue jack_detect_work, + * the timer need to be delted first + */ + del_timer_sync(&rt5645->btn_check_timer); + cancel_delayed_work_sync(&rt5645->jack_detect_work); cancel_delayed_work_sync(&rt5645->rcclock_work); - del_timer_sync(&rt5645->btn_check_timer); regulator_bulk_disable(ARRAY_SIZE(rt5645->supplies), rt5645->supplies); -- GitLab From c2673b86b4195260ba41de97fc9c10d17250ff8e Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Tue, 22 Mar 2022 16:06:39 +0800 Subject: [PATCH 0472/4335] nbd: Fix hung on disconnect request if socket is closed before [ Upstream commit 491bf8f236fdeec698fa6744993f1ecf3fafd1a5 ] When userspace closes the socket before sending a disconnect request, the following I/O requests will be blocked in wait_for_reconnect() until dead timeout. This will cause the following disconnect request also hung on blk_mq_quiesce_queue(). That means we have no way to disconnect a nbd device if there are some I/O requests waiting for reconnecting until dead timeout. It's not expected. So let's wake up the thread waiting for reconnecting directly when a disconnect request is sent. Reported-by: Xu Jianhai Signed-off-by: Xie Yongji Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20220322080639.142-1-xieyongji@bytedance.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 582b23befb5c4..8704212482e54 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -896,11 +896,15 @@ static int wait_for_reconnect(struct nbd_device *nbd) struct nbd_config *config = nbd->config; if (!config->dead_conn_timeout) return 0; - if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags)) + + if (!wait_event_timeout(config->conn_wait, + test_bit(NBD_RT_DISCONNECTED, + &config->runtime_flags) || + atomic_read(&config->live_connections) > 0, + config->dead_conn_timeout)) return 0; - return wait_event_timeout(config->conn_wait, - atomic_read(&config->live_connections) > 0, - config->dead_conn_timeout) > 0; + + return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags); } static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) @@ -2026,6 +2030,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) mutex_lock(&nbd->config_lock); nbd_disconnect(nbd); sock_shutdown(nbd); + wake_up(&nbd->config->conn_wait); /* * Make sure recv thread has finished, we can safely call nbd_clear_que() * to cancel the inflight I/Os. -- GitLab From c525d3385f6d34d6dcce89ebe019314d83f23873 Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Wed, 11 May 2022 16:06:12 +0530 Subject: [PATCH 0473/4335] drm/amd/pm: update smartshift powerboost calc for smu12 [ Upstream commit 138292f1dc00e7e0724f44769f9da39cf2f3bf0b ] smartshift apu and dgpu power boost are reported as percentage with respect to their power limits. This value[0-100] reflects the boost for the respective device. Signed-off-by: Sathishkumar S Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 60 ++++++++++++++----- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 145f13b8c977d..138466081875c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -1127,6 +1127,39 @@ static int renoir_get_power_profile_mode(struct smu_context *smu, return size; } +static void renoir_get_ss_power_percent(SmuMetrics_t *metrics, + uint32_t *apu_percent, uint32_t *dgpu_percent) +{ + uint32_t apu_boost = 0; + uint32_t dgpu_boost = 0; + uint16_t apu_limit = 0; + uint16_t dgpu_limit = 0; + uint16_t apu_power = 0; + uint16_t dgpu_power = 0; + + apu_power = metrics->ApuPower; + apu_limit = metrics->StapmOriginalLimit; + if (apu_power > apu_limit && apu_limit != 0) + apu_boost = ((apu_power - apu_limit) * 100) / apu_limit; + apu_boost = (apu_boost > 100) ? 100 : apu_boost; + + dgpu_power = metrics->dGpuPower; + if (metrics->StapmCurrentLimit > metrics->StapmOriginalLimit) + dgpu_limit = metrics->StapmCurrentLimit - metrics->StapmOriginalLimit; + if (dgpu_power > dgpu_limit && dgpu_limit != 0) + dgpu_boost = ((dgpu_power - dgpu_limit) * 100) / dgpu_limit; + dgpu_boost = (dgpu_boost > 100) ? 100 : dgpu_boost; + + if (dgpu_boost >= apu_boost) + apu_boost = 0; + else + dgpu_boost = 0; + + *apu_percent = apu_boost; + *dgpu_percent = dgpu_boost; +} + + static int renoir_get_smu_metrics_data(struct smu_context *smu, MetricsMember_t member, uint32_t *value) @@ -1135,6 +1168,9 @@ static int renoir_get_smu_metrics_data(struct smu_context *smu, SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table; int ret = 0; + uint32_t apu_percent = 0; + uint32_t dgpu_percent = 0; + mutex_lock(&smu->metrics_lock); @@ -1183,26 +1219,18 @@ static int renoir_get_smu_metrics_data(struct smu_context *smu, *value = metrics->Voltage[1]; break; case METRICS_SS_APU_SHARE: - /* return the percentage of APU power with respect to APU's power limit. - * percentage is reported, this isn't boost value. Smartshift power - * boost/shift is only when the percentage is more than 100. + /* return the percentage of APU power boost + * with respect to APU's power limit. */ - if (metrics->StapmOriginalLimit > 0) - *value = (metrics->ApuPower * 100) / metrics->StapmOriginalLimit; - else - *value = 0; + renoir_get_ss_power_percent(metrics, &apu_percent, &dgpu_percent); + *value = apu_percent; break; case METRICS_SS_DGPU_SHARE: - /* return the percentage of dGPU power with respect to dGPU's power limit. - * percentage is reported, this isn't boost value. Smartshift power - * boost/shift is only when the percentage is more than 100. + /* return the percentage of dGPU power boost + * with respect to dGPU's power limit. */ - if ((metrics->dGpuPower > 0) && - (metrics->StapmCurrentLimit > metrics->StapmOriginalLimit)) - *value = (metrics->dGpuPower * 100) / - (metrics->StapmCurrentLimit - metrics->StapmOriginalLimit); - else - *value = 0; + renoir_get_ss_power_percent(metrics, &apu_percent, &dgpu_percent); + *value = dgpu_percent; break; default: *value = UINT_MAX; -- GitLab From 5005002b2e12e9ac4b58e59354d38a4106906f87 Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Wed, 11 May 2022 16:35:59 +0530 Subject: [PATCH 0474/4335] drm/amd/pm: update smartshift powerboost calc for smu13 [ Upstream commit cdf4c8ec39872a61a58d62f19b4db80f0f7bc586 ] smartshift apu and dgpu power boost are reported as percentage with respect to their power limits. adjust the units of power before calculating the percentage of boost. Signed-off-by: Sathishkumar S Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 62 ++++++++++++++----- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index 0e1a843608e43..33bd5430c6dee 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -305,6 +305,42 @@ static int yellow_carp_mode2_reset(struct smu_context *smu) return yellow_carp_mode_reset(smu, SMU_RESET_MODE_2); } + +static void yellow_carp_get_ss_power_percent(SmuMetrics_t *metrics, + uint32_t *apu_percent, uint32_t *dgpu_percent) +{ + uint32_t apu_boost = 0; + uint32_t dgpu_boost = 0; + uint16_t apu_limit = 0; + uint16_t dgpu_limit = 0; + uint16_t apu_power = 0; + uint16_t dgpu_power = 0; + + /* APU and dGPU power values are reported in milli Watts + * and STAPM power limits are in Watts */ + apu_power = metrics->ApuPower/1000; + apu_limit = metrics->StapmOpnLimit; + if (apu_power > apu_limit && apu_limit != 0) + apu_boost = ((apu_power - apu_limit) * 100) / apu_limit; + apu_boost = (apu_boost > 100) ? 100 : apu_boost; + + dgpu_power = metrics->dGpuPower/1000; + if (metrics->StapmCurrentLimit > metrics->StapmOpnLimit) + dgpu_limit = metrics->StapmCurrentLimit - metrics->StapmOpnLimit; + if (dgpu_power > dgpu_limit && dgpu_limit != 0) + dgpu_boost = ((dgpu_power - dgpu_limit) * 100) / dgpu_limit; + dgpu_boost = (dgpu_boost > 100) ? 100 : dgpu_boost; + + if (dgpu_boost >= apu_boost) + apu_boost = 0; + else + dgpu_boost = 0; + + *apu_percent = apu_boost; + *dgpu_percent = dgpu_boost; + +} + static int yellow_carp_get_smu_metrics_data(struct smu_context *smu, MetricsMember_t member, uint32_t *value) @@ -313,6 +349,8 @@ static int yellow_carp_get_smu_metrics_data(struct smu_context *smu, SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table; int ret = 0; + uint32_t apu_percent = 0; + uint32_t dgpu_percent = 0; mutex_lock(&smu->metrics_lock); @@ -365,26 +403,18 @@ static int yellow_carp_get_smu_metrics_data(struct smu_context *smu, *value = metrics->Voltage[1]; break; case METRICS_SS_APU_SHARE: - /* return the percentage of APU power with respect to APU's power limit. - * percentage is reported, this isn't boost value. Smartshift power - * boost/shift is only when the percentage is more than 100. + /* return the percentage of APU power boost + * with respect to APU's power limit. */ - if (metrics->StapmOpnLimit > 0) - *value = (metrics->ApuPower * 100) / metrics->StapmOpnLimit; - else - *value = 0; + yellow_carp_get_ss_power_percent(metrics, &apu_percent, &dgpu_percent); + *value = apu_percent; break; case METRICS_SS_DGPU_SHARE: - /* return the percentage of dGPU power with respect to dGPU's power limit. - * percentage is reported, this isn't boost value. Smartshift power - * boost/shift is only when the percentage is more than 100. + /* return the percentage of dGPU power boost + * with respect to dGPU's power limit. */ - if ((metrics->dGpuPower > 0) && - (metrics->StapmCurrentLimit > metrics->StapmOpnLimit)) - *value = (metrics->dGpuPower * 100) / - (metrics->StapmCurrentLimit - metrics->StapmOpnLimit); - else - *value = 0; + yellow_carp_get_ss_power_percent(metrics, &apu_percent, &dgpu_percent); + *value = dgpu_percent; break; default: *value = UINT_MAX; -- GitLab From 660dfa033ccc9afb032015b6dc76e846bba42cfb Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 13 May 2022 08:46:12 -0300 Subject: [PATCH 0475/4335] net: phy: micrel: Allow probing without .driver_data [ Upstream commit f2ef6f7539c68c6bd6c32323d8845ee102b7c450 ] Currently, if the .probe element is present in the phy_driver structure and the .driver_data is not, a NULL pointer dereference happens. Allow passing .probe without .driver_data by inserting NULL checks for priv->type. Signed-off-by: Fabio Estevam Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220513114613.762810-1-festevam@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/micrel.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 64d829ed98876..05a8985d71073 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -335,7 +335,7 @@ static int kszphy_config_reset(struct phy_device *phydev) } } - if (priv->led_mode >= 0) + if (priv->type && priv->led_mode >= 0) kszphy_setup_led(phydev, priv->type->led_mode_reg, priv->led_mode); return 0; @@ -351,10 +351,10 @@ static int kszphy_config_init(struct phy_device *phydev) type = priv->type; - if (type->has_broadcast_disable) + if (type && type->has_broadcast_disable) kszphy_broadcast_disable(phydev); - if (type->has_nand_tree_disable) + if (type && type->has_nand_tree_disable) kszphy_nand_tree_disable(phydev); return kszphy_config_reset(phydev); @@ -1328,7 +1328,7 @@ static int kszphy_probe(struct phy_device *phydev) priv->type = type; - if (type->led_mode_reg) { + if (type && type->led_mode_reg) { ret = of_property_read_u32(np, "micrel,led-mode", &priv->led_mode); if (ret) @@ -1349,7 +1349,8 @@ static int kszphy_probe(struct phy_device *phydev) unsigned long rate = clk_get_rate(clk); bool rmii_ref_clk_sel_25_mhz; - priv->rmii_ref_clk_sel = type->has_rmii_ref_clk_sel; + if (type) + priv->rmii_ref_clk_sel = type->has_rmii_ref_clk_sel; rmii_ref_clk_sel_25_mhz = of_property_read_bool(np, "micrel,rmii-reference-clock-select-25-mhz"); -- GitLab From 16cae5d8580263c71a12b837247ce676baf7014f Mon Sep 17 00:00:00 2001 From: Kwanghoon Son Date: Wed, 27 Apr 2022 03:16:45 +0200 Subject: [PATCH 0476/4335] media: exynos4-is: Fix compile warning [ Upstream commit e080f5c1f2b6d02c02ee5d674e0e392ccf63bbaf ] Declare static on function 'fimc_isp_video_device_unregister'. When VIDEO_EXYNOS4_ISP_DMA_CAPTURE=n, compiler warns about warning: no previous prototype for function [-Wmissing-prototypes] Reported-by: kernel test robot Signed-off-by: Kwanghoon Son Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/exynos4-is/fimc-isp-video.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.h b/drivers/media/platform/exynos4-is/fimc-isp-video.h index edcb3a5e3cb90..2dd4ddbc748a1 100644 --- a/drivers/media/platform/exynos4-is/fimc-isp-video.h +++ b/drivers/media/platform/exynos4-is/fimc-isp-video.h @@ -32,7 +32,7 @@ static inline int fimc_isp_video_device_register(struct fimc_isp *isp, return 0; } -void fimc_isp_video_device_unregister(struct fimc_isp *isp, +static inline void fimc_isp_video_device_unregister(struct fimc_isp *isp, enum v4l2_buf_type type) { } -- GitLab From bdd5770169438fca5575adf7945fb7d0b362610c Mon Sep 17 00:00:00 2001 From: Nicolas Dufresne Date: Fri, 13 May 2022 22:29:19 +0200 Subject: [PATCH 0477/4335] media: hantro: Stop using H.264 parameter pic_num [ Upstream commit 831410700909f4e29d5af1ef26b8c59fc2d1988e ] The hardware expects FrameNumWrap or long_term_frame_idx. Picture numbers are per field, and are mostly used during the memory management process, which is done in userland. This fixes two ITU conformance tests: - MR6_BT_B - MR8_BT_B Signed-off-by: Nicolas Dufresne Reviewed-by: Sebastian Fricke Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/staging/media/hantro/hantro_h264.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/media/hantro/hantro_h264.c b/drivers/staging/media/hantro/hantro_h264.c index 0b4d2491be3b8..228629fb3cdf9 100644 --- a/drivers/staging/media/hantro/hantro_h264.c +++ b/drivers/staging/media/hantro/hantro_h264.c @@ -354,8 +354,6 @@ u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, unsigned int dpb_idx) if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) return 0; - if (dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) - return dpb->pic_num; return dpb->frame_num; } -- GitLab From f8ceb9745ba71bac088b459b4689d77515cc2800 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 17 May 2022 12:26:46 -0500 Subject: [PATCH 0478/4335] ASoC: max98357a: remove dependency on GPIOLIB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 21ca3274333f5c1cbbf9d91e5b33f4f2463859b2 ] commit dcc2c012c7691 ("ASoC: Fix gpiolib dependencies") removed a series of unnecessary dependencies on GPIOLIB when the gpio was optional. A similar simplification seems valid for max98357a, so remove the dependency as well. This will avoid the following warning WARNING: unmet direct dependencies detected for SND_SOC_MAX98357A Depends on [n]: SOUND [=y] && !UML && SND [=y] && SND_SOC [=y] && GPIOLIB [=n] Selected by [y]: - SND_SOC_INTEL_SOF_CS42L42_MACH [=y] && SOUND [=y] && !UML && SND [=y] && SND_SOC [=y] && SND_SOC_INTEL_MACH [=y] && (SND_SOC_SOF_HDA_LINK [=y] || SND_SOC_SOF_BAYTRAIL [=n]) && I2C [=y] && ACPI [=y] && SND_HDA_CODEC_HDMI [=y] && SND_SOC_SOF_HDA_AUDIO_CODEC [=y] && (MFD_INTEL_LPSS [=y] || COMPILE_TEST [=n]) Reported-by: kernel test robot Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Link: https://lore.kernel.org/r/20220517172647.468244-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 47e675e8bd006..2cbf4fc0f6757 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -900,7 +900,6 @@ config SND_SOC_MAX98095 config SND_SOC_MAX98357A tristate "Maxim MAX98357A CODEC" - depends on GPIOLIB config SND_SOC_MAX98371 tristate -- GitLab From 649689f49ad95d19b121c21e74eaa1fabb140c24 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 17 May 2022 12:26:47 -0500 Subject: [PATCH 0479/4335] ASoC: rt1015p: remove dependency on GPIOLIB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b390c25c6757b9d56cecdfbf6d55f15fc89a6386 ] commit dcc2c012c7691 ("ASoC: Fix gpiolib dependencies") removed a series of unnecessary dependencies on GPIOLIB when the gpio was optional. A similar simplification seems valid for rt1015p, so remove the dependency as well. This will avoid the following warning WARNING: unmet direct dependencies detected for SND_SOC_RT1015P Depends on [n]: SOUND [=y] && !UML && SND [=y] && SND_SOC [=y] && GPIOLIB [=n] Selected by [y]: - SND_SOC_INTEL_SOF_RT5682_MACH [=y] && SOUND [=y] && !UML && SND [=y] && SND_SOC [=y] && SND_SOC_INTEL_MACH [=y] && (SND_SOC_SOF_HDA_LINK [=y] || SND_SOC_SOF_BAYTRAIL [=n]) && I2C [=y] && ACPI [=y] && (SND_HDA_CODEC_HDMI [=y] && SND_SOC_SOF_HDA_AUDIO_CODEC [=y] && (MFD_INTEL_LPSS [=y] || COMPILE_TEST [=y]) || SND_SOC_SOF_BAYTRAIL [=n] && (X86_INTEL_LPSS [=n] || COMPILE_TEST [=y])) Reported-by: kernel test robot Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Link: https://lore.kernel.org/r/20220517172647.468244-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 2cbf4fc0f6757..d59a7e99ce42a 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1148,7 +1148,6 @@ config SND_SOC_RT1015 config SND_SOC_RT1015P tristate - depends on GPIOLIB config SND_SOC_RT1019 tristate -- GitLab From d9c96e05574f22074f0c8ec108372a8dfecbd3fe Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Wed, 18 May 2022 11:08:59 +0200 Subject: [PATCH 0480/4335] ACPI: CPPC: Assume no transition latency if no PCCT [ Upstream commit 6380b7b2b29da9d9c5ab2d4a265901cd93ba3696 ] The transition_delay_us (struct cpufreq_policy) is currently defined as: Preferred average time interval between consecutive invocations of the driver to set the frequency for this policy. To be set by the scaling driver (0, which is the default, means no preference). The transition_latency represents the amount of time necessary for a CPU to change its frequency. A PCCT table advertises mutliple values: - pcc_nominal: Expected latency to process a command, in microseconds - pcc_mpar: The maximum number of periodic requests that the subspace channel can support, reported in commands per minute. 0 indicates no limitation. - pcc_mrtt: The minimum amount of time that OSPM must wait after the completion of a command before issuing the next command, in microseconds. cppc_get_transition_latency() allows to get the max of them. commit d4f3388afd48 ("cpufreq / CPPC: Set platform specific transition_delay_us") allows to select transition_delay_us based on the platform, and fallbacks to cppc_get_transition_latency() otherwise. If _CPC objects are not using PCC channels (no PPCT table), the transition_delay_us is set to CPUFREQ_ETERNAL, leading to really long periods between frequency updates (~4s). If the desired_reg, where performance requests are written, is in SystemMemory or SystemIo ACPI address space, there is no delay in requests. So return 0 instead of CPUFREQ_ETERNAL, leading to transition_delay_us being set to LATENCY_MULTIPLIER us (1000 us). This patch also adds two macros to check the address spaces. Signed-off-by: Pierre Gondois Reviewed-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/cppc_acpi.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 71b87c16f92da..ed13410306845 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -100,6 +100,16 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr); (cpc)->cpc_entry.reg.space_id == \ ACPI_ADR_SPACE_PLATFORM_COMM) +/* Check if a CPC register is in SystemMemory */ +#define CPC_IN_SYSTEM_MEMORY(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \ + (cpc)->cpc_entry.reg.space_id == \ + ACPI_ADR_SPACE_SYSTEM_MEMORY) + +/* Check if a CPC register is in SystemIo */ +#define CPC_IN_SYSTEM_IO(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \ + (cpc)->cpc_entry.reg.space_id == \ + ACPI_ADR_SPACE_SYSTEM_IO) + /* Evaluates to True if reg is a NULL register descriptor */ #define IS_NULL_REG(reg) ((reg)->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY && \ (reg)->address == 0 && \ @@ -1378,6 +1388,9 @@ EXPORT_SYMBOL_GPL(cppc_set_perf); * transition latency for performance change requests. The closest we have * is the timing information from the PCCT tables which provides the info * on the number and frequency of PCC commands the platform can handle. + * + * If desired_reg is in the SystemMemory or SystemIo ACPI address space, + * then assume there is no latency. */ unsigned int cppc_get_transition_latency(int cpu_num) { @@ -1403,7 +1416,9 @@ unsigned int cppc_get_transition_latency(int cpu_num) return CPUFREQ_ETERNAL; desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF]; - if (!CPC_IN_PCC(desired_reg)) + if (CPC_IN_SYSTEM_MEMORY(desired_reg) || CPC_IN_SYSTEM_IO(desired_reg)) + return 0; + else if (!CPC_IN_PCC(desired_reg)) return CPUFREQ_ETERNAL; if (pcc_ss_id < 0) -- GitLab From 980b79d0be5616b67880582eef620c226c709169 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 18 May 2022 08:51:38 -0700 Subject: [PATCH 0481/4335] nvme: set non-mdts limits in nvme_scan_work [ Upstream commit 78288665b5d0154978fed431985310cb4f166836 ] In current implementation we set the non-mdts limits by calling nvme_init_non_mdts_limits() from nvme_init_ctrl_finish(). This also tries to set the limits for the discovery controller which has no I/O queues resulting in the warning message reported by the nvme_log_error() when running blktest nvme/002: - [ 2005.155946] run blktests nvme/002 at 2022-04-09 16:57:47 [ 2005.192223] loop: module loaded [ 2005.196429] nvmet: adding nsid 1 to subsystem blktests-subsystem-0 [ 2005.200334] nvmet: adding nsid 1 to subsystem blktests-subsystem-1 <------------------------------SNIP----------------------------------> [ 2008.958108] nvmet: adding nsid 1 to subsystem blktests-subsystem-997 [ 2008.962082] nvmet: adding nsid 1 to subsystem blktests-subsystem-998 [ 2008.966102] nvmet: adding nsid 1 to subsystem blktests-subsystem-999 [ 2008.973132] nvmet: creating discovery controller 1 for subsystem nqn.2014-08.org.nvmexpress.discovery for NQN testhostnqn. *[ 2008.973196] nvme1: Identify(0x6), Invalid Field in Command (sct 0x0 / sc 0x2) MORE DNR* [ 2008.974595] nvme nvme1: new ctrl: "nqn.2014-08.org.nvmexpress.discovery" [ 2009.103248] nvme nvme1: Removing ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery" Move the call of nvme_init_non_mdts_limits() to nvme_scan_work() after we verify that I/O queues are created since that is a converging point for each transport where these limits are actually used. 1. FC : nvme_fc_create_association() ... nvme_fc_create_io_queues(ctrl); ... nvme_start_ctrl() nvme_scan_queue() nvme_scan_work() 2. PCIe:- nvme_reset_work() ... nvme_setup_io_queues() nvme_create_io_queues() nvme_alloc_queue() ... nvme_start_ctrl() nvme_scan_queue() nvme_scan_work() 3. RDMA :- nvme_rdma_setup_ctrl ... nvme_rdma_configure_io_queues ... nvme_start_ctrl() nvme_scan_queue() nvme_scan_work() 4. TCP :- nvme_tcp_setup_ctrl ... nvme_tcp_configure_io_queues ... nvme_start_ctrl() nvme_scan_queue() nvme_scan_work() * nvme_scan_work() ... nvme_validate_or_alloc_ns() nvme_alloc_ns() nvme_update_ns_info() nvme_update_disk_info() nvme_config_discard() <--- blk_queue_max_write_zeroes_sectors() <--- Signed-off-by: Chaitanya Kulkarni Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 87877397d1add..711b89424bd06 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3032,10 +3032,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) if (ret) return ret; - ret = nvme_init_non_mdts_limits(ctrl); - if (ret < 0) - return ret; - ret = nvme_configure_apst(ctrl); if (ret < 0) return ret; @@ -4096,11 +4092,26 @@ static void nvme_scan_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, scan_work); + int ret; /* No tagset on a live ctrl means IO queues could not created */ if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset) return; + /* + * Identify controller limits can change at controller reset due to + * new firmware download, even though it is not common we cannot ignore + * such scenario. Controller's non-mdts limits are reported in the unit + * of logical blocks that is dependent on the format of attached + * namespace. Hence re-read the limits at the time of ns allocation. + */ + ret = nvme_init_non_mdts_limits(ctrl); + if (ret < 0) { + dev_warn(ctrl->device, + "reading non-mdts-limits failed: %d\n", ret); + return; + } + if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) { dev_info(ctrl->device, "rescanning namespaces.\n"); nvme_clear_changed_ns_log(ctrl); -- GitLab From 28beef47f18bc371022461c24b20efc68be4c992 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 18 May 2022 20:43:57 +0900 Subject: [PATCH 0482/4335] can: mcp251xfd: silence clang's -Wunaligned-access warning [ Upstream commit 1a6dd9996699889313327be03981716a8337656b ] clang emits a -Wunaligned-access warning on union mcp251xfd_tx_ojb_load_buf. The reason is that field hw_tx_obj (not declared as packed) is being packed right after a 16 bits field inside a packed struct: | union mcp251xfd_tx_obj_load_buf { | struct __packed { | struct mcp251xfd_buf_cmd cmd; | /* ^ 16 bits fields */ | struct mcp251xfd_hw_tx_obj_raw hw_tx_obj; | /* ^ not declared as packed */ | } nocrc; | struct __packed { | struct mcp251xfd_buf_cmd_crc cmd; | struct mcp251xfd_hw_tx_obj_raw hw_tx_obj; | __be16 crc; | } crc; | } ____cacheline_aligned; Starting from LLVM 14, having an unpacked struct nested in a packed struct triggers a warning. c.f. [1]. This is a false positive because the field is always being accessed with the relevant put_unaligned_*() function. Adding __packed to the structure declaration silences the warning. [1] https://github.com/llvm/llvm-project/issues/55520 Link: https://lore.kernel.org/all/20220518114357.55452-1-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Reported-by: kernel test robot Tested-by: Nathan Chancellor # build Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/spi/mcp251xfd/mcp251xfd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h index 0f322dabaf651..281856eea2ef8 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h @@ -433,7 +433,7 @@ struct mcp251xfd_hw_tef_obj { /* The tx_obj_raw version is used in spi async, i.e. without * regmap. We have to take care of endianness ourselves. */ -struct mcp251xfd_hw_tx_obj_raw { +struct __packed mcp251xfd_hw_tx_obj_raw { __le32 id; __le32 flags; u8 data[sizeof_field(struct canfd_frame, data)]; -- GitLab From 6f516847427c186f4c71a369400379ff1a589ebf Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 19 May 2022 16:59:13 +0200 Subject: [PATCH 0483/4335] x86/microcode: Add explicit CPU vendor dependency [ Upstream commit 9c55d99e099bd7aa6b91fce8718505c35d5dfc65 ] Add an explicit dependency to the respective CPU vendor so that the respective microcode support for it gets built only when that support is enabled. Reported-by: Randy Dunlap Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/8ead0da9-9545-b10d-e3db-7df1a1f219e4@infradead.org Signed-off-by: Sasha Levin --- arch/x86/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d02b04d300966..1d0f16b533937 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1323,7 +1323,7 @@ config MICROCODE config MICROCODE_INTEL bool "Intel microcode loading support" - depends on MICROCODE + depends on CPU_SUP_INTEL && MICROCODE default MICROCODE help This options enables microcode patch loading support for Intel @@ -1335,7 +1335,7 @@ config MICROCODE_INTEL config MICROCODE_AMD bool "AMD microcode loading support" - depends on MICROCODE + depends on CPU_SUP_AMD && MICROCODE help If you select this option, microcode patch loading support for AMD processors will be enabled. -- GitLab From d295f28cb6ebf3668bfd7750e4209ee6a4717b74 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 19 May 2022 10:12:13 -0500 Subject: [PATCH 0484/4335] net: ipa: ignore endianness if there is no header [ Upstream commit 332ef7c814bdd60f08d0d9013d0e1104798b2d23 ] If we program an RX endpoint to have no header (header length is 0), header-related endpoint configuration values are meaningless and are ignored. The only case we support that defines a header is QMAP endpoints. In ipa_endpoint_init_hdr_ext() we set the endianness mask value unconditionally, but it should not be done if there is no header (meaning it is not configured for QMAP). Set the endianness conditionally, and rearrange the logic in that function slightly to avoid testing the qmap flag twice. Delete an incorrect comment in ipa_endpoint_init_aggr(). Signed-off-by: Alex Elder Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ipa/ipa_endpoint.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 477eb4051bed7..703e1630f9c26 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -570,19 +570,23 @@ static void ipa_endpoint_init_hdr_ext(struct ipa_endpoint *endpoint) struct ipa *ipa = endpoint->ipa; u32 val = 0; - val |= HDR_ENDIANNESS_FMASK; /* big endian */ - - /* A QMAP header contains a 6 bit pad field at offset 0. The RMNet - * driver assumes this field is meaningful in packets it receives, - * and assumes the header's payload length includes that padding. - * The RMNet driver does *not* pad packets it sends, however, so - * the pad field (although 0) should be ignored. - */ - if (endpoint->data->qmap && !endpoint->toward_ipa) { - val |= HDR_TOTAL_LEN_OR_PAD_VALID_FMASK; - /* HDR_TOTAL_LEN_OR_PAD is 0 (pad, not total_len) */ - val |= HDR_PAYLOAD_LEN_INC_PADDING_FMASK; - /* HDR_TOTAL_LEN_OR_PAD_OFFSET is 0 */ + if (endpoint->data->qmap) { + /* We have a header, so we must specify its endianness */ + val |= HDR_ENDIANNESS_FMASK; /* big endian */ + + /* A QMAP header contains a 6 bit pad field at offset 0. + * The RMNet driver assumes this field is meaningful in + * packets it receives, and assumes the header's payload + * length includes that padding. The RMNet driver does + * *not* pad packets it sends, however, so the pad field + * (although 0) should be ignored. + */ + if (!endpoint->toward_ipa) { + val |= HDR_TOTAL_LEN_OR_PAD_VALID_FMASK; + /* HDR_TOTAL_LEN_OR_PAD is 0 (pad, not total_len) */ + val |= HDR_PAYLOAD_LEN_INC_PADDING_FMASK; + /* HDR_TOTAL_LEN_OR_PAD_OFFSET is 0 */ + } } /* HDR_PAYLOAD_LEN_INC_PADDING is 0 */ @@ -740,8 +744,6 @@ static void ipa_endpoint_init_aggr(struct ipa_endpoint *endpoint) close_eof = endpoint->data->rx.aggr_close_eof; val |= aggr_sw_eof_active_encoded(version, close_eof); - - /* AGGR_HARD_BYTE_LIMIT_ENABLE is 0 */ } else { val |= u32_encode_bits(IPA_ENABLE_DEAGGR, AGGR_EN_FMASK); -- GitLab From 16195406211b0eadd5ec6cd10fae4a9d8ea1f987 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 20 May 2022 16:32:30 +0200 Subject: [PATCH 0485/4335] m68k: atari: Make Atari ROM port I/O write macros return void [ Upstream commit 30b5e6ef4a32ea4985b99200e06d6660a69f9246 ] The macros implementing Atari ROM port I/O writes do not cast away their output, unlike similar implementations for other I/O buses. When they are combined using conditional expressions in the definitions of outb() and friends, this triggers sparse warnings like: drivers/net/appletalk/cops.c:382:17: error: incompatible types in conditional expression (different base types): drivers/net/appletalk/cops.c:382:17: unsigned char drivers/net/appletalk/cops.c:382:17: void Fix this by adding casts to "void". Reported-by: kernel test robot Reported-by: Guenter Roeck Signed-off-by: Geert Uytterhoeven Reviewed-by: Guenter Roeck Reviewed-by: Michael Schmitz Link: https://lore.kernel.org/r/c15bedc83d90a14fffcd5b1b6bfb32b8a80282c5.1653057096.git.geert@linux-m68k.org Signed-off-by: Sasha Levin --- arch/m68k/include/asm/raw_io.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/m68k/include/asm/raw_io.h b/arch/m68k/include/asm/raw_io.h index 80eb2396d01eb..3ba40bc1dfaa9 100644 --- a/arch/m68k/include/asm/raw_io.h +++ b/arch/m68k/include/asm/raw_io.h @@ -80,14 +80,14 @@ ({ u16 __v = le16_to_cpu(*(__force volatile u16 *) (addr)); __v; }) #define rom_out_8(addr, b) \ - ({u8 __maybe_unused __w, __v = (b); u32 _addr = ((u32) (addr)); \ + (void)({u8 __maybe_unused __w, __v = (b); u32 _addr = ((u32) (addr)); \ __w = ((*(__force volatile u8 *) ((_addr | 0x10000) + (__v<<1)))); }) #define rom_out_be16(addr, w) \ - ({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \ + (void)({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \ __w = ((*(__force volatile u16 *) ((_addr & 0xFFFF0000UL) + ((__v & 0xFF)<<1)))); \ __w = ((*(__force volatile u16 *) ((_addr | 0x10000) + ((__v >> 8)<<1)))); }) #define rom_out_le16(addr, w) \ - ({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \ + (void)({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \ __w = ((*(__force volatile u16 *) ((_addr & 0xFFFF0000UL) + ((__v >> 8)<<1)))); \ __w = ((*(__force volatile u16 *) ((_addr | 0x10000) + ((__v & 0xFF)<<1)))); }) -- GitLab From a6e80df4f02cacadf17b2cb564cb5bb1adf0b05d Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 21 May 2022 08:45:41 +0100 Subject: [PATCH 0486/4335] rxrpc: Return an error to sendmsg if call failed [ Upstream commit 4ba68c5192554876bd8c3afd904e3064d2915341 ] If at the end of rxrpc sendmsg() or rxrpc_kernel_send_data() the call that was being given data was aborted remotely or otherwise failed, return an error rather than returning the amount of data buffered for transmission. The call (presumably) did not complete, so there's not much point continuing with it. AF_RXRPC considers it "complete" and so will be unwilling to do anything else with it - and won't send a notification for it, deeming the return from sendmsg sufficient. Not returning an error causes afs to incorrectly handle a StoreData operation that gets interrupted by a change of address due to NAT reconfiguration. This doesn't normally affect most operations since their request parameters tend to fit into a single UDP packet and afs_make_call() returns before the server responds; StoreData is different as it involves transmission of a lot of data. This can be triggered on a client by doing something like: dd if=/dev/zero of=/afs/example.com/foo bs=1M count=512 at one prompt, and then changing the network address at another prompt, e.g.: ifconfig enp6s0 inet 192.168.6.2 && route add 192.168.6.1 dev enp6s0 Tracing packets on an Auristor fileserver looks something like: 192.168.6.1 -> 192.168.6.3 RX 107 ACK Idle Seq: 0 Call: 4 Source Port: 7000 Destination Port: 7001 192.168.6.3 -> 192.168.6.1 AFS (RX) 1482 FS Request: Unknown(64538) (64538) 192.168.6.3 -> 192.168.6.1 AFS (RX) 1482 FS Request: Unknown(64538) (64538) 192.168.6.1 -> 192.168.6.3 RX 107 ACK Idle Seq: 0 Call: 4 Source Port: 7000 Destination Port: 7001 192.168.6.2 -> 192.168.6.1 AFS (RX) 1482 FS Request: Unknown(0) (0) 192.168.6.2 -> 192.168.6.1 AFS (RX) 1482 FS Request: Unknown(0) (0) 192.168.6.1 -> 192.168.6.2 RX 107 ACK Exceeds Window Seq: 0 Call: 4 Source Port: 7000 Destination Port: 7001 192.168.6.1 -> 192.168.6.2 RX 74 ABORT Seq: 0 Call: 4 Source Port: 7000 Destination Port: 7001 192.168.6.1 -> 192.168.6.2 RX 74 ABORT Seq: 29321 Call: 4 Source Port: 7000 Destination Port: 7001 The Auristor fileserver logs code -453 (RXGEN_SS_UNMARSHAL), but the abort code received by kafs is -5 (RX_PROTOCOL_ERROR) as the rx layer sees the condition and generates an abort first and the unmarshal error is a consequence of that at the application layer. Reported-by: Marc Dionne Signed-off-by: David Howells cc: linux-afs@lists.infradead.org Link: http://lists.infradead.org/pipermail/linux-afs/2021-December/004810.html # v1 Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rxrpc/sendmsg.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index af8ad6c30b9fb..1d38e279e2efa 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -444,6 +444,12 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, success: ret = copied; + if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) { + read_lock_bh(&call->state_lock); + if (call->error < 0) + ret = call->error; + read_unlock_bh(&call->state_lock); + } out: call->tx_pending = skb; _leave(" = %d", ret); -- GitLab From 65b578726d48ee0941f324bb9a1dd15101cbec84 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 21 May 2022 08:45:48 +0100 Subject: [PATCH 0487/4335] rxrpc, afs: Fix selection of abort codes [ Upstream commit de696c4784f0706884458893c5a6c39b3a3ff65c ] The RX_USER_ABORT code should really only be used to indicate that the user of the rxrpc service (ie. userspace) implicitly caused a call to be aborted - for instance if the AF_RXRPC socket is closed whilst the call was in progress. (The user may also explicitly abort a call and specify the abort code to use). Change some of the points of generation to use other abort codes instead: (1) Abort the call with RXGEN_SS_UNMARSHAL or RXGEN_CC_UNMARSHAL if we see ENOMEM and EFAULT during received data delivery and abort with RX_CALL_DEAD in the default case. (2) Abort with RXGEN_SS_MARSHAL if we get ENOMEM whilst trying to send a reply. (3) Abort with RX_CALL_DEAD if we stop hearing from the peer if we had heard from the peer and abort with RX_CALL_TIMEOUT if we hadn't. (4) Abort with RX_CALL_DEAD if we try to disconnect a call that's not completed successfully or been aborted. Reported-by: Jeffrey Altman Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- fs/afs/rxrpc.c | 8 +++++--- net/rxrpc/call_event.c | 4 ++-- net/rxrpc/conn_object.c | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 23a1a92d64bb5..a5434f3e57c68 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -537,6 +537,8 @@ static void afs_deliver_to_call(struct afs_call *call) case -ENODATA: case -EBADMSG: case -EMSGSIZE: + case -ENOMEM: + case -EFAULT: abort_code = RXGEN_CC_UNMARSHAL; if (state != AFS_CALL_CL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; @@ -544,7 +546,7 @@ static void afs_deliver_to_call(struct afs_call *call) abort_code, ret, "KUM"); goto local_abort; default: - abort_code = RX_USER_ABORT; + abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, "KER"); goto local_abort; @@ -836,7 +838,7 @@ void afs_send_empty_reply(struct afs_call *call) case -ENOMEM: _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, - RX_USER_ABORT, -ENOMEM, "KOO"); + RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); fallthrough; default: _leave(" [error]"); @@ -878,7 +880,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, - RX_USER_ABORT, -ENOMEM, "KOO"); + RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); } _leave(" [error]"); } diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 22e05de5d1ca9..e426f6831aab4 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -377,9 +377,9 @@ recheck_state: if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && (int)call->conn->hi_serial - (int)call->rx_serial > 0) { trace_rxrpc_call_reset(call); - rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ECONNRESET); + rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET); } else { - rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); + rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); } set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index b2159dbf5412c..660cd9b1a4658 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -183,7 +183,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, chan->last_type = RXRPC_PACKET_TYPE_ABORT; break; default: - chan->last_abort = RX_USER_ABORT; + chan->last_abort = RX_CALL_DEAD; chan->last_type = RXRPC_PACKET_TYPE_ABORT; break; } -- GitLab From dd07286924134bff9dcf85e59f02c63b197e5af1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 21 May 2022 08:45:55 +0100 Subject: [PATCH 0488/4335] afs: Adjust ACK interpretation to try and cope with NAT [ Upstream commit adc9613ff66c26ebaff9814973181ac178beb90b ] If a client's address changes, say if it is NAT'd, this can disrupt an in progress operation. For most operations, this is not much of a problem, but StoreData can be different as some servers modify the target file as the data comes in, so if a store request is disrupted, the file can get corrupted on the server. The problem is that the server doesn't recognise packets that come after the change of address as belonging to the original client and will bounce them, either by sending an OUT_OF_SEQUENCE ACK to the apparent new call if the packet number falls within the initial sequence number window of a call or by sending an EXCEEDS_WINDOW ACK if it falls outside and then aborting it. In both cases, firstPacket will be 1 and previousPacket will be 0 in the ACK information. Fix this by the following means: (1) If a client call receives an EXCEEDS_WINDOW ACK with firstPacket as 1 and previousPacket as 0, assume this indicates that the server saw the incoming packets from a different peer and thus as a different call. Fail the call with error -ENETRESET. (2) Also fail the call if a similar OUT_OF_SEQUENCE ACK occurs if the first packet has been hard-ACK'd. If it hasn't been hard-ACK'd, the ACK packet will cause it to get retransmitted, so the call will just be repeated. (3) Make afs_select_fileserver() treat -ENETRESET as a straight fail of the operation. (4) Prioritise the error code over things like -ECONNRESET as the server did actually respond. (5) Make writeback treat -ENETRESET as a retryable error and make it redirty all the pages involved in a write so that the VM will retry. Note that there is still a circumstance that I can't easily deal with: if the operation is fully received and processed by the server, but the reply is lost due to address change. There's no way to know if the op happened. We can examine the server, but a conflicting change could have been made by a third party - and we can't tell the difference. In such a case, a message like: kAFS: vnode modified {100058:146266} b7->b8 YFS.StoreData64 (op=2646a) will be logged to dmesg on the next op to touch the file and the client will reset the inode state, including invalidating clean parts of the pagecache. Reported-by: Marc Dionne Signed-off-by: David Howells cc: linux-afs@lists.infradead.org Link: http://lists.infradead.org/pipermail/linux-afs/2021-December/004811.html # v1 Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- fs/afs/misc.c | 5 ++++- fs/afs/rotate.c | 4 ++++ fs/afs/write.c | 1 + net/rxrpc/input.c | 27 +++++++++++++++++++++++++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 1d1a8debe4723..933e67fcdab1a 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -163,8 +163,11 @@ void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code) return; case -ECONNABORTED: + error = afs_abort_to_error(abort_code); + fallthrough; + case -ENETRESET: /* Responded, but we seem to have changed address */ e->responded = true; - e->error = afs_abort_to_error(abort_code); + e->error = error; return; } } diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 79e1a5f6701be..a840c3588ebbb 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -292,6 +292,10 @@ bool afs_select_fileserver(struct afs_operation *op) op->error = error; goto iterate_address; + case -ENETRESET: + pr_warn("kAFS: Peer reset %s (op=%x)\n", + op->type ? op->type->name : "???", op->debug_id); + fallthrough; case -ECONNRESET: _debug("call reset"); op->error = error; diff --git a/fs/afs/write.c b/fs/afs/write.c index f24370f5c7744..a75c4742062aa 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -626,6 +626,7 @@ static ssize_t afs_write_back_from_locked_page(struct address_space *mapping, case -EKEYEXPIRED: case -EKEYREJECTED: case -EKEYREVOKED: + case -ENETRESET: afs_redirty_pages(wbc, mapping, start, len); mapping_set_error(mapping, ret); break; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index dc201363f2c48..67d3eba60dc71 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -903,6 +903,33 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ack_respond_to_ack); } + /* If we get an EXCEEDS_WINDOW ACK from the server, it probably + * indicates that the client address changed due to NAT. The server + * lost the call because it switched to a different peer. + */ + if (unlikely(buf.ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) && + first_soft_ack == 1 && + prev_pkt == 0 && + rxrpc_is_client_call(call)) { + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + 0, -ENETRESET); + return; + } + + /* If we get an OUT_OF_SEQUENCE ACK from the server, that can also + * indicate a change of address. However, we can retransmit the call + * if we still have it buffered to the beginning. + */ + if (unlikely(buf.ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) && + first_soft_ack == 1 && + prev_pkt == 0 && + call->tx_hard_ack == 0 && + rxrpc_is_client_call(call)) { + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + 0, -ENETRESET); + return; + } + /* Discard any out-of-order or duplicate ACKs (outside lock). */ if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, -- GitLab From 4bcd926d787e3112ef4b2f8a573e9354c6c6dbb9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 20 May 2022 12:56:05 -0700 Subject: [PATCH 0489/4335] eth: tg3: silence the GCC 12 array-bounds warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9dec850fd7c210a04b4707df8e6c95bfafdd6a4b ] GCC 12 currently generates a rather inconsistent warning: drivers/net/ethernet/broadcom/tg3.c:17795:51: warning: array subscript 5 is above array bounds of ‘struct tg3_napi[5]’ [-Warray-bounds] 17795 | struct tg3_napi *tnapi = &tp->napi[i]; | ~~~~~~~~^~~ i is guaranteed < tp->irq_max which in turn is either 1 or 5. There are more loops like this one in the driver, but strangely GCC 12 dislikes only this single one. Silence this silliness for now. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile index 0ddfb5b5d53ca..2e6c5f258a1ff 100644 --- a/drivers/net/ethernet/broadcom/Makefile +++ b/drivers/net/ethernet/broadcom/Makefile @@ -17,3 +17,8 @@ obj-$(CONFIG_BGMAC_BCMA) += bgmac-bcma.o bgmac-bcma-mdio.o obj-$(CONFIG_BGMAC_PLATFORM) += bgmac-platform.o obj-$(CONFIG_SYSTEMPORT) += bcmsysport.o obj-$(CONFIG_BNXT) += bnxt/ + +# FIXME: temporarily silence -Warray-bounds on non W=1+ builds +ifndef KBUILD_EXTRA_WARN +CFLAGS_tg3.o += -Wno-array-bounds +endif -- GitLab From f8bd7f369bb86d4d74d490aaa46c95e410697784 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 26 Apr 2022 10:06:02 +0200 Subject: [PATCH 0490/4335] char: tpm: cr50_i2c: Suppress duplicated error message in .remove() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e0687fe958f763f1790f22ed5483025b7624e744 ] Returning an error value in an i2c remove callback results in an error message being emitted by the i2c core, but otherwise it doesn't make a difference. The device goes away anyhow and the devm cleanups are called. As tpm_cr50_i2c_remove() emits an error message already and the additional error message by the i2c core doesn't add any useful information, change the return value to zero to suppress this error message. Note that if i2c_clientdata is NULL, there is something really fishy. Assuming no memory corruption happened (then all bets are lost anyhow), tpm_cr50_i2c_remove() is only called after tpm_cr50_i2c_probe() returned successfully. So there was a tpm chip registered before and after tpm_cr50_i2c_remove() its privdata is freed but the associated character device isn't removed. If after that happened userspace accesses the character device it's likely that the freed memory is accessed. For that reason the warning message is made a bit more frightening. Signed-off-by: Uwe Kleine-König Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin --- drivers/char/tpm/tpm_tis_i2c_cr50.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_i2c_cr50.c b/drivers/char/tpm/tpm_tis_i2c_cr50.c index c892781037037..e2ab6a329732b 100644 --- a/drivers/char/tpm/tpm_tis_i2c_cr50.c +++ b/drivers/char/tpm/tpm_tis_i2c_cr50.c @@ -754,8 +754,8 @@ static int tpm_cr50_i2c_remove(struct i2c_client *client) struct device *dev = &client->dev; if (!chip) { - dev_err(dev, "Could not get client data at remove\n"); - return -ENODEV; + dev_crit(dev, "Could not get client data at remove, memory corruption ahead\n"); + return 0; } tpm_chip_unregister(chip); -- GitLab From da6b6b4eab55853096134872592764dc88c4261a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 23 May 2022 08:20:44 -0700 Subject: [PATCH 0491/4335] selftests/bpf: fix btf_dump/btf_dump due to recent clang change [ Upstream commit 4050764cbaa25760aab40857f723393c07898474 ] Latest llvm-project upstream had a change of behavior related to qualifiers on function return type ([1]). This caused selftests btf_dump/btf_dump failure. The following example shows what changed. $ cat t.c typedef const char * const (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int)); struct t { int a; fn_ptr_arr2_t l; }; int foo(struct t *arg) { return arg->a; } Compiled with latest upstream llvm15, $ clang -O2 -g -target bpf -S -emit-llvm t.c The related generated debuginfo IR looks like: !16 = !DIDerivedType(tag: DW_TAG_typedef, name: "fn_ptr_arr2_t", file: !1, line: 1, baseType: !17) !17 = !DICompositeType(tag: DW_TAG_array_type, baseType: !18, size: 320, elements: !32) !18 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !19) !19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64) !20 = !DISubroutineType(types: !21) !21 = !{!22, null} !22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !23, size: 64) !23 = !DISubroutineType(types: !24) !24 = !{!25, !28} !25 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !26, size: 64) !26 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !27) !27 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) You can see two intermediate const qualifier to pointer are dropped in debuginfo IR. With llvm14, we have following debuginfo IR: !16 = !DIDerivedType(tag: DW_TAG_typedef, name: "fn_ptr_arr2_t", file: !1, line: 1, baseType: !17) !17 = !DICompositeType(tag: DW_TAG_array_type, baseType: !18, size: 320, elements: !34) !18 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !19) !19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64) !20 = !DISubroutineType(types: !21) !21 = !{!22, null} !22 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !23) !23 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !24, size: 64) !24 = !DISubroutineType(types: !25) !25 = !{!26, !30} !26 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !27) !27 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !28, size: 64) !28 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !29) !29 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) All const qualifiers are preserved. To adapt the selftest to both old and new llvm, this patch removed the intermediate const qualifier in const-to-ptr types, to make the test succeed again. [1] https://reviews.llvm.org/D125919 Reported-by: Mykola Lysenko Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220523152044.3905809-1-yhs@fb.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index 8aaa24a003220..970598dda7322 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -94,7 +94,7 @@ typedef void (* (*signal_t)(int, void (*)(int)))(int); typedef char * (*fn_ptr_arr1_t[10])(int **); -typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int)); +typedef char * (* (* const fn_ptr_arr2_t[5])())(char * (*)(int)); struct struct_w_typedefs { int_t a; -- GitLab From b897da780fd4ffa112ad191b73bf14b01f4479b4 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 11 Feb 2022 10:50:08 -0500 Subject: [PATCH 0492/4335] gfs2: use i_lock spin_lock for inode qadata [ Upstream commit 5fcff61eea9efd1f4b60e89d2d686b5feaea100f ] Before this patch, functions gfs2_qa_get and _put used the i_rw_mutex to prevent simultaneous access to its i_qadata. But i_rw_mutex is now used for many other things, including iomap_begin and end, which causes a conflict according to lockdep. We cannot just remove the lock since simultaneous opens (gfs2_open -> gfs2_open_common -> gfs2_qa_get) can then stomp on each others values for i_qadata. This patch solves the conflict by using the i_lock spin_lock in the inode to prevent simultaneous access. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/quota.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index be0997e24d60b..dc77080a82bbf 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -531,34 +531,42 @@ static void qdsb_put(struct gfs2_quota_data *qd) */ int gfs2_qa_get(struct gfs2_inode *ip) { - int error = 0; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct inode *inode = &ip->i_inode; if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; - down_write(&ip->i_rw_mutex); + spin_lock(&inode->i_lock); if (ip->i_qadata == NULL) { - ip->i_qadata = kmem_cache_zalloc(gfs2_qadata_cachep, GFP_NOFS); - if (!ip->i_qadata) { - error = -ENOMEM; - goto out; - } + struct gfs2_qadata *tmp; + + spin_unlock(&inode->i_lock); + tmp = kmem_cache_zalloc(gfs2_qadata_cachep, GFP_NOFS); + if (!tmp) + return -ENOMEM; + + spin_lock(&inode->i_lock); + if (ip->i_qadata == NULL) + ip->i_qadata = tmp; + else + kmem_cache_free(gfs2_qadata_cachep, tmp); } ip->i_qadata->qa_ref++; -out: - up_write(&ip->i_rw_mutex); - return error; + spin_unlock(&inode->i_lock); + return 0; } void gfs2_qa_put(struct gfs2_inode *ip) { - down_write(&ip->i_rw_mutex); + struct inode *inode = &ip->i_inode; + + spin_lock(&inode->i_lock); if (ip->i_qadata && --ip->i_qadata->qa_ref == 0) { kmem_cache_free(gfs2_qadata_cachep, ip->i_qadata); ip->i_qadata = NULL; } - up_write(&ip->i_rw_mutex); + spin_unlock(&inode->i_lock); } int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) -- GitLab From ff876b6cf5785e9604cd407022db917734b5d7ad Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Tue, 17 May 2022 21:29:13 +0200 Subject: [PATCH 0493/4335] scsi: target: tcmu: Avoid holding XArray lock when calling lock_page [ Upstream commit 325d5c5fb216674296f3902a8902b942da3adc5b ] In tcmu_blocks_release(), lock_page() is called to prevent a race causing possible data corruption. Since lock_page() might sleep, calling it while holding XArray lock is a bug. To fix this, replace the xas_for_each() call with xa_for_each_range(). Since the latter does its own handling of XArray locking, the xas_lock() and xas_unlock() calls around the original loop are no longer necessary. The switch to xa_for_each_range() slows down the loop slightly. This is acceptable since tcmu_blocks_release() is not relevant for performance. Link: https://lore.kernel.org/r/20220517192913.21405-1-bostroesser@gmail.com Fixes: bb9b9eb0ae2e ("scsi: target: tcmu: Fix possible data corruption") Reported-by: Dan Carpenter Signed-off-by: Bodo Stroesser Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/target_core_user.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 0173f44b015a7..1e8e9dd3f482c 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1661,13 +1661,14 @@ static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd) static u32 tcmu_blocks_release(struct tcmu_dev *udev, unsigned long first, unsigned long last) { - XA_STATE(xas, &udev->data_pages, first * udev->data_pages_per_blk); struct page *page; + unsigned long dpi; u32 pages_freed = 0; - xas_lock(&xas); - xas_for_each(&xas, page, (last + 1) * udev->data_pages_per_blk - 1) { - xas_store(&xas, NULL); + first = first * udev->data_pages_per_blk; + last = (last + 1) * udev->data_pages_per_blk - 1; + xa_for_each_range(&udev->data_pages, dpi, page, first, last) { + xa_erase(&udev->data_pages, dpi); /* * While reaching here there may be page faults occurring on * the to-be-released pages. A race condition may occur if @@ -1691,7 +1692,6 @@ static u32 tcmu_blocks_release(struct tcmu_dev *udev, unsigned long first, __free_page(page); pages_freed++; } - xas_unlock(&xas); atomic_sub(pages_freed, &global_page_count); -- GitLab From b6c6b398629d1fe266554b5c74986eb1db4b5362 Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Mon, 28 Feb 2022 20:51:44 +0100 Subject: [PATCH 0494/4335] IB/rdmavt: add missing locks in rvt_ruc_loopback [ Upstream commit 22cbc6c2681a0a4fe76150270426e763d52353a4 ] The documentation of the function rvt_error_qp says both r_lock and s_lock need to be held when calling that function. It also asserts using lockdep that both of those locks are held. rvt_error_qp is called form rvt_send_cq, which is called from rvt_qp_complete_swqe, which is called from rvt_send_complete, which is called from rvt_ruc_loopback in two places. Both of these places do not hold r_lock. Fix this by acquiring a spin_lock of r_lock in both of these places. The r_lock acquiring cannot be added in rvt_qp_complete_swqe because some of its other callers already have r_lock acquired. Link: https://lore.kernel.org/r/20220228195144.71946-1-dossche.niels@gmail.com Signed-off-by: Niels Dossche Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rdmavt/qp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 8ef112f883a77..3acab569fbb94 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -2775,7 +2775,7 @@ void rvt_qp_iter(struct rvt_dev_info *rdi, EXPORT_SYMBOL(rvt_qp_iter); /* - * This should be called with s_lock held. + * This should be called with s_lock and r_lock held. */ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) @@ -3134,7 +3134,9 @@ send_comp: rvp->n_loop_pkts++; flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; + spin_lock(&sqp->r_lock); rvt_send_complete(sqp, wqe, send_status); + spin_unlock(&sqp->r_lock); if (local_ops) { atomic_dec(&sqp->local_ops_pending); local_ops = 0; @@ -3188,7 +3190,9 @@ serr: spin_unlock_irqrestore(&qp->r_lock, flags); serr_no_r_lock: spin_lock_irqsave(&sqp->s_lock, flags); + spin_lock(&sqp->r_lock); rvt_send_complete(sqp, wqe, send_status); + spin_unlock(&sqp->r_lock); if (sqp->ibqp.qp_type == IB_QPT_RC) { int lastwqe; -- GitLab From 531156e1712f4e0b3140fbbb6d80c5bd0d50f29e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 7 Apr 2022 21:29:59 +0200 Subject: [PATCH 0495/4335] ARM: dts: ox820: align interrupt controller node name with dtschema [ Upstream commit fbcd5ad7a419ad40644a0bb8b4152bc660172d8a ] Fixes dtbs_check warnings like: gic@1000: $nodename:0: 'gic@1000' does not match '^interrupt-controller(@[0-9a-f,]+)*$' Signed-off-by: Krzysztof Kozlowski Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/20220317115705.450427-1-krzysztof.kozlowski@canonical.com Signed-off-by: Sasha Levin --- arch/arm/boot/dts/ox820.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/ox820.dtsi b/arch/arm/boot/dts/ox820.dtsi index 90846a7655b49..dde4364892bf0 100644 --- a/arch/arm/boot/dts/ox820.dtsi +++ b/arch/arm/boot/dts/ox820.dtsi @@ -287,7 +287,7 @@ clocks = <&armclk>; }; - gic: gic@1000 { + gic: interrupt-controller@1000 { compatible = "arm,arm11mp-gic"; interrupt-controller; #interrupt-cells = <3>; -- GitLab From 5712f029726e81d5a48860a7ed55526a527aee4d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 7 Apr 2022 21:30:22 +0200 Subject: [PATCH 0496/4335] ARM: dts: socfpga: align interrupt controller node name with dtschema [ Upstream commit c9bdd50d2019f78bf4c1f6a79254c27771901023 ] Fixes dtbs_check warnings like: $nodename:0: 'intc@fffed000' does not match '^interrupt-controller(@[0-9a-f,]+)*$' Signed-off-by: Krzysztof Kozlowski Acked-by: Dinh Nguyen Link: https://lore.kernel.org/r/20220317115705.450427-2-krzysztof.kozlowski@canonical.com Signed-off-by: Sasha Levin --- arch/arm/boot/dts/socfpga.dtsi | 2 +- arch/arm/boot/dts/socfpga_arria10.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index 7c1d6423d7f8c..b8c5dd7860cb2 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -46,7 +46,7 @@ <0xff113000 0x1000>; }; - intc: intc@fffed000 { + intc: interrupt-controller@fffed000 { compatible = "arm,cortex-a9-gic"; #interrupt-cells = <3>; interrupt-controller; diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index 3ba431dfa8c94..f1e50d2e623a3 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -38,7 +38,7 @@ <0xff113000 0x1000>; }; - intc: intc@ffffd000 { + intc: interrupt-controller@ffffd000 { compatible = "arm,cortex-a9-gic"; #interrupt-cells = <3>; interrupt-controller; -- GitLab From fccd667c9c574cea38c3144434ad083ffb95a63e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 27 Mar 2022 11:08:54 -0700 Subject: [PATCH 0497/4335] ARM: dts: s5pv210: align DMA channels with dtschema [ Upstream commit 9e916fb9bc3d16066286f19fc9c51d26a6aec6bd ] dtschema expects DMA channels in specific order (tx, rx and tx-sec). The order actually should not matter because dma-names is used however let's make it aligned with dtschema to suppress warnings like: i2s@eee30000: dma-names: ['rx', 'tx', 'tx-sec'] is not valid under any of the given schemas Signed-off-by: Krzysztof Kozlowski Co-developed-by: Jonathan Bakker Signed-off-by: Jonathan Bakker Link: https://lore.kernel.org/r/CY4PR04MB056779A9C50DC95987C5272ACB1C9@CY4PR04MB0567.namprd04.prod.outlook.com Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- arch/arm/boot/dts/s5pv210-aries.dtsi | 2 +- arch/arm/boot/dts/s5pv210.dtsi | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/s5pv210-aries.dtsi b/arch/arm/boot/dts/s5pv210-aries.dtsi index 2f57100a011a3..9b7da2bc3a06f 100644 --- a/arch/arm/boot/dts/s5pv210-aries.dtsi +++ b/arch/arm/boot/dts/s5pv210-aries.dtsi @@ -636,7 +636,7 @@ }; &i2s0 { - dmas = <&pdma0 9>, <&pdma0 10>, <&pdma0 11>; + dmas = <&pdma0 10>, <&pdma0 9>, <&pdma0 11>; status = "okay"; }; diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index 353ba7b09a0c0..c5265f3ae31d6 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -239,8 +239,8 @@ reg = <0xeee30000 0x1000>; interrupt-parent = <&vic2>; interrupts = <16>; - dma-names = "rx", "tx", "tx-sec"; - dmas = <&pdma1 9>, <&pdma1 10>, <&pdma1 11>; + dma-names = "tx", "rx", "tx-sec"; + dmas = <&pdma1 10>, <&pdma1 9>, <&pdma1 11>; clock-names = "iis", "i2s_opclk0", "i2s_opclk1"; @@ -259,8 +259,8 @@ reg = <0xe2100000 0x1000>; interrupt-parent = <&vic2>; interrupts = <17>; - dma-names = "rx", "tx"; - dmas = <&pdma1 12>, <&pdma1 13>; + dma-names = "tx", "rx"; + dmas = <&pdma1 13>, <&pdma1 12>; clock-names = "iis", "i2s_opclk0"; clocks = <&clocks CLK_I2S1>, <&clocks SCLK_AUDIO1>; pinctrl-names = "default"; @@ -274,8 +274,8 @@ reg = <0xe2a00000 0x1000>; interrupt-parent = <&vic2>; interrupts = <18>; - dma-names = "rx", "tx"; - dmas = <&pdma1 14>, <&pdma1 15>; + dma-names = "tx", "rx"; + dmas = <&pdma1 15>, <&pdma1 14>; clock-names = "iis", "i2s_opclk0"; clocks = <&clocks CLK_I2S2>, <&clocks SCLK_AUDIO2>; pinctrl-names = "default"; -- GitLab From 8337956649ee1ab4d589fd3bf746762e5326452f Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 19 Mar 2022 18:46:38 +0100 Subject: [PATCH 0498/4335] arm64: dts: qcom: msm8994: Fix the cont_splash_mem address [ Upstream commit 049c46f31a726bf8d202ff1681661513447fac84 ] The default memory map places cont_splash_mem at 3401000, which was overlooked.. Fix it! Signed-off-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220319174645.340379-9-konrad.dybcio@somainline.org Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8994.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index 5a9a5ed0565f6..1165269f059e6 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -183,8 +183,8 @@ no-map; }; - cont_splash_mem: memory@3800000 { - reg = <0 0x03800000 0 0x2400000>; + cont_splash_mem: memory@3401000 { + reg = <0 0x03401000 0 0x2200000>; no-map; }; -- GitLab From 5d44a053594cd2a1c88a2c6e688f15796155e7ca Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 19 Mar 2022 18:46:43 +0100 Subject: [PATCH 0499/4335] arm64: dts: qcom: msm8994: Fix BLSP[12]_DMA channels count [ Upstream commit 1ae438d26b620979ed004d559c304d31c42173ae ] MSM8994 actually features 24 DMA channels for each BLSP, fix it! Signed-off-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220319174645.340379-14-konrad.dybcio@somainline.org Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8994.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index 1165269f059e6..3c27671c8b5c3 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -498,7 +498,7 @@ #dma-cells = <1>; qcom,ee = <0>; qcom,controlled-remotely; - num-channels = <18>; + num-channels = <24>; qcom,num-ees = <4>; }; @@ -634,7 +634,7 @@ #dma-cells = <1>; qcom,ee = <0>; qcom,controlled-remotely; - num-channels = <18>; + num-channels = <24>; qcom,num-ees = <4>; }; -- GitLab From cb1be1d4be18fe286ba5a67d928598378fd7fbe5 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 8 Mar 2022 11:08:59 -0800 Subject: [PATCH 0500/4335] PM / devfreq: rk3399_dmc: Disable edev on remove() [ Upstream commit 2fccf9e6050e0e3b8b4cd275d41daf7f7fa22804 ] Otherwise we hit an unablanced enable-count when unbinding the DFI device: [ 1279.659119] ------------[ cut here ]------------ [ 1279.659179] WARNING: CPU: 2 PID: 5638 at drivers/devfreq/devfreq-event.c:360 devfreq_event_remove_edev+0x84/0x8c ... [ 1279.659352] Hardware name: Google Kevin (DT) [ 1279.659363] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO BTYPE=--) [ 1279.659371] pc : devfreq_event_remove_edev+0x84/0x8c [ 1279.659380] lr : devm_devfreq_event_release+0x1c/0x28 ... [ 1279.659571] Call trace: [ 1279.659582] devfreq_event_remove_edev+0x84/0x8c [ 1279.659590] devm_devfreq_event_release+0x1c/0x28 [ 1279.659602] release_nodes+0x1cc/0x244 [ 1279.659611] devres_release_all+0x44/0x60 [ 1279.659621] device_release_driver_internal+0x11c/0x1ac [ 1279.659629] device_driver_detach+0x20/0x2c [ 1279.659641] unbind_store+0x7c/0xb0 [ 1279.659650] drv_attr_store+0x2c/0x40 [ 1279.659663] sysfs_kf_write+0x44/0x58 [ 1279.659672] kernfs_fop_write_iter+0xf4/0x190 [ 1279.659684] vfs_write+0x2b0/0x2e4 [ 1279.659693] ksys_write+0x80/0xec [ 1279.659701] __arm64_sys_write+0x24/0x30 [ 1279.659714] el0_svc_common+0xf0/0x1d8 [ 1279.659724] do_el0_svc_compat+0x28/0x3c [ 1279.659738] el0_svc_compat+0x10/0x1c [ 1279.659746] el0_sync_compat_handler+0xa8/0xcc [ 1279.659758] el0_sync_compat+0x188/0x1c0 [ 1279.659768] ---[ end trace cec200e5094155b4 ]--- Signed-off-by: Brian Norris Signed-off-by: Chanwoo Choi Signed-off-by: Sasha Levin --- drivers/devfreq/rk3399_dmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index 293857ebfd75d..538e8dc74f40a 100644 --- a/drivers/devfreq/rk3399_dmc.c +++ b/drivers/devfreq/rk3399_dmc.c @@ -477,6 +477,8 @@ static int rk3399_dmcfreq_remove(struct platform_device *pdev) { struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(&pdev->dev); + devfreq_event_disable_edev(dmcfreq->edev); + /* * Before remove the opp table we need to unregister the opp notifier. */ -- GitLab From eee44c7268421a9fd5bc33662d8751b4cc316301 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 6 Apr 2022 11:11:39 +0300 Subject: [PATCH 0501/4335] crypto: ccree - use fine grained DMA mapping dir [ Upstream commit a260436c98171cd825955a84a7f6e62bc8f4f00d ] Use a fine grained specification of DMA mapping directions in certain cases, allowing both a more optimized operation as well as shushing out a harmless, though persky dma-debug warning. Signed-off-by: Gilad Ben-Yossef Reported-by: Corentin Labbe Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/ccree/cc_buffer_mgr.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index 11e0278c8631d..6140e49273226 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -356,12 +356,14 @@ void cc_unmap_cipher_request(struct device *dev, void *ctx, req_ctx->mlli_params.mlli_dma_addr); } - dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL); - dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src)); - if (src != dst) { - dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_TO_DEVICE); + dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_FROM_DEVICE); dev_dbg(dev, "Unmapped req->dst=%pK\n", sg_virt(dst)); + dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src)); + } else { + dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL); + dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src)); } } @@ -377,6 +379,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, u32 dummy = 0; int rc = 0; u32 mapped_nents = 0; + int src_direction = (src != dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL); req_ctx->dma_buf_type = CC_DMA_BUF_DLLI; mlli_params->curr_pool = NULL; @@ -399,7 +402,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, } /* Map the src SGL */ - rc = cc_map_sg(dev, src, nbytes, DMA_BIDIRECTIONAL, &req_ctx->in_nents, + rc = cc_map_sg(dev, src, nbytes, src_direction, &req_ctx->in_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dummy, &mapped_nents); if (rc) goto cipher_exit; @@ -416,7 +419,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, } } else { /* Map the dst sg */ - rc = cc_map_sg(dev, dst, nbytes, DMA_BIDIRECTIONAL, + rc = cc_map_sg(dev, dst, nbytes, DMA_FROM_DEVICE, &req_ctx->out_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dummy, &mapped_nents); if (rc) @@ -456,6 +459,7 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) struct aead_req_ctx *areq_ctx = aead_request_ctx(req); unsigned int hw_iv_size = areq_ctx->hw_iv_size; struct cc_drvdata *drvdata = dev_get_drvdata(dev); + int src_direction = (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL); if (areq_ctx->mac_buf_dma_addr) { dma_unmap_single(dev, areq_ctx->mac_buf_dma_addr, @@ -514,13 +518,11 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) sg_virt(req->src), areq_ctx->src.nents, areq_ctx->assoc.nents, areq_ctx->assoclen, req->cryptlen); - dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, - DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, src_direction); if (req->src != req->dst) { dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n", sg_virt(req->dst)); - dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, - DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, DMA_FROM_DEVICE); } if (drvdata->coherent && areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT && @@ -843,7 +845,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, else size_for_map -= authsize; - rc = cc_map_sg(dev, req->dst, size_for_map, DMA_BIDIRECTIONAL, + rc = cc_map_sg(dev, req->dst, size_for_map, DMA_FROM_DEVICE, &areq_ctx->dst.mapped_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dst_last_bytes, &dst_mapped_nents); @@ -1056,7 +1058,8 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) size_to_map += authsize; } - rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL, + rc = cc_map_sg(dev, req->src, size_to_map, + (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL), &areq_ctx->src.mapped_nents, (LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES + LLI_MAX_NUM_OF_DATA_ENTRIES), -- GitLab From 7cef9274fa1b8506949d74bc45aef072b890824a Mon Sep 17 00:00:00 2001 From: QintaoShen Date: Thu, 24 Mar 2022 15:44:03 +0800 Subject: [PATCH 0502/4335] soc: ti: ti_sci_pm_domains: Check for null return of devm_kcalloc [ Upstream commit ba56291e297d28aa6eb82c5c1964fae2d7594746 ] The allocation funciton devm_kcalloc may fail and return a null pointer, which would cause a null-pointer dereference later. It might be better to check it and directly return -ENOMEM just like the usage of devm_kcalloc in previous code. Signed-off-by: QintaoShen Signed-off-by: Nishanth Menon Link: https://lore.kernel.org/r/1648107843-29077-1-git-send-email-unSimple1993@163.com Signed-off-by: Sasha Levin --- drivers/soc/ti/ti_sci_pm_domains.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/soc/ti/ti_sci_pm_domains.c b/drivers/soc/ti/ti_sci_pm_domains.c index 8afb3f45d2637..a33ec7eaf23d1 100644 --- a/drivers/soc/ti/ti_sci_pm_domains.c +++ b/drivers/soc/ti/ti_sci_pm_domains.c @@ -183,6 +183,8 @@ static int ti_sci_pm_domain_probe(struct platform_device *pdev) devm_kcalloc(dev, max_id + 1, sizeof(*pd_provider->data.domains), GFP_KERNEL); + if (!pd_provider->data.domains) + return -ENOMEM; pd_provider->data.num_domains = max_id + 1; pd_provider->data.xlate = ti_sci_pd_xlate; -- GitLab From 4b9380d92c66cdc66987f65130789abad5c1af6f Mon Sep 17 00:00:00 2001 From: Zixuan Fu Date: Mon, 11 Apr 2022 18:45:34 +0800 Subject: [PATCH 0503/4335] fs: jfs: fix possible NULL pointer dereference in dbFree() [ Upstream commit 0d4837fdb796f99369cf7691d33de1b856bcaf1f ] In our fault-injection testing, the variable "nblocks" in dbFree() can be zero when kmalloc_array() fails in dtSearch(). In this case, the variable "mp" in dbFree() would be NULL and then it is dereferenced in "write_metapage(mp)". The failure log is listed as follows: [ 13.824137] BUG: kernel NULL pointer dereference, address: 0000000000000020 ... [ 13.827416] RIP: 0010:dbFree+0x5f7/0x910 [jfs] [ 13.834341] Call Trace: [ 13.834540] [ 13.834713] txFreeMap+0x7b4/0xb10 [jfs] [ 13.835038] txUpdateMap+0x311/0x650 [jfs] [ 13.835375] jfs_lazycommit+0x5f2/0xc70 [jfs] [ 13.835726] ? sched_dynamic_update+0x1b0/0x1b0 [ 13.836092] kthread+0x3c2/0x4a0 [ 13.836355] ? txLockFree+0x160/0x160 [jfs] [ 13.836763] ? kthread_unuse_mm+0x160/0x160 [ 13.837106] ret_from_fork+0x1f/0x30 [ 13.837402] ... This patch adds a NULL check of "mp" before "write_metapage(mp)" is called. Reported-by: TOTE Robot Signed-off-by: Zixuan Fu Signed-off-by: Dave Kleikamp Signed-off-by: Sasha Levin --- fs/jfs/jfs_dmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index d8502f4989d9d..e75f31b81d634 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -385,7 +385,8 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) } /* write the last buffer. */ - write_metapage(mp); + if (mp) + write_metapage(mp); IREAD_UNLOCK(ipbmap); -- GitLab From 4b9185fbdbfb0c22db0425443a642405ec0c902b Mon Sep 17 00:00:00 2001 From: Joel Selvaraj Date: Wed, 30 Mar 2022 12:15:04 +0530 Subject: [PATCH 0504/4335] arm64: dts: qcom: sdm845-xiaomi-beryllium: fix typo in panel's vddio-supply property [ Upstream commit 1f1c494082a1f10d03ce4ee1485ee96d212e22ff ] vddio is misspelled with a "0" instead of "o". Fix it. Signed-off-by: Joel Selvaraj Reviewed-by: Caleb Connolly Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/BY5PR02MB7009901651E6A8D5ACB0425ED91F9@BY5PR02MB7009.namprd02.prod.outlook.com Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts b/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts index c60c8c640e17f..736951fabb7a9 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts @@ -221,7 +221,7 @@ panel@0 { compatible = "tianma,fhd-video"; reg = <0>; - vddi0-supply = <&vreg_l14a_1p8>; + vddio-supply = <&vreg_l14a_1p8>; vddpos-supply = <&lab>; vddneg-supply = <&ibb>; -- GitLab From 07ddf6fbfea1e6d247933d49443705c72e3e7c78 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 21 Apr 2022 08:41:00 +0200 Subject: [PATCH 0505/4335] ALSA: usb-audio: Add quirk bits for enabling/disabling generic implicit fb [ Upstream commit 0f1f7a6661394fe4a53db254c346d6aa2dd64397 ] For making easier to test, add the new quirk_flags bits 17 and 18 to enable and disable the generic implicit feedback mode. The bit 17 is equivalent with implicit_fb=1 option, applying the generic implicit feedback sync mode. OTOH, the bit 18 disables the implicit fb mode forcibly. Link: https://lore.kernel.org/r/20220421064101.12456-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- Documentation/sound/alsa-configuration.rst | 4 +++- sound/usb/implicit.c | 5 ++++- sound/usb/usbaudio.h | 6 ++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Documentation/sound/alsa-configuration.rst b/Documentation/sound/alsa-configuration.rst index 65f61695f5618..5d093fb4896b5 100644 --- a/Documentation/sound/alsa-configuration.rst +++ b/Documentation/sound/alsa-configuration.rst @@ -2237,7 +2237,7 @@ implicit_fb Apply the generic implicit feedback sync mode. When this is set and the playback stream sync mode is ASYNC, the driver tries to tie an adjacent ASYNC capture stream as the implicit feedback - source. + source. This is equivalent with quirk_flags bit 17. use_vmalloc Use vmalloc() for allocations of the PCM buffers (default: yes). For architectures with non-coherent memory like ARM or MIPS, the @@ -2279,6 +2279,8 @@ quirk_flags * bit 14: Ignore errors for mixer access * bit 15: Support generic DSD raw U32_BE format * bit 16: Set up the interface at first like UAC1 + * bit 17: Apply the generic implicit feedback sync mode + * bit 18: Don't apply implicit feedback sync mode This module supports multiple devices, autoprobe and hotplugging. diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 2d444ec742029..1fd087128538d 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -350,7 +350,8 @@ static int audioformat_implicit_fb_quirk(struct snd_usb_audio *chip, } /* Try the generic implicit fb if available */ - if (chip->generic_implicit_fb) + if (chip->generic_implicit_fb || + (chip->quirk_flags & QUIRK_FLAG_GENERIC_IMPLICIT_FB)) return add_generic_implicit_fb(chip, fmt, alts); /* No quirk */ @@ -387,6 +388,8 @@ int snd_usb_parse_implicit_fb_quirk(struct snd_usb_audio *chip, struct audioformat *fmt, struct usb_host_interface *alts) { + if (chip->quirk_flags & QUIRK_FLAG_SKIP_IMPLICIT_FB) + return 0; if (fmt->endpoint & USB_DIR_IN) return audioformat_capture_quirk(chip, fmt, alts); else diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index b8359a0aa008a..044cd7ab27cbb 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -164,6 +164,10 @@ extern bool snd_usb_skip_validation; * Support generic DSD raw U32_BE format * QUIRK_FLAG_SET_IFACE_FIRST: * Set up the interface at first like UAC1 + * QUIRK_FLAG_GENERIC_IMPLICIT_FB + * Apply the generic implicit feedback sync mode (same as implicit_fb=1 option) + * QUIRK_FLAG_SKIP_IMPLICIT_FB + * Don't apply implicit feedback sync mode */ #define QUIRK_FLAG_GET_SAMPLE_RATE (1U << 0) @@ -183,5 +187,7 @@ extern bool snd_usb_skip_validation; #define QUIRK_FLAG_IGNORE_CTL_ERROR (1U << 14) #define QUIRK_FLAG_DSD_RAW (1U << 15) #define QUIRK_FLAG_SET_IFACE_FIRST (1U << 16) +#define QUIRK_FLAG_GENERIC_IMPLICIT_FB (1U << 17) +#define QUIRK_FLAG_SKIP_IMPLICIT_FB (1U << 18) #endif /* __USBAUDIO_H */ -- GitLab From 035ce4ffe0febed53c88229508cf1b57da6b0551 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 21 Apr 2022 08:41:01 +0200 Subject: [PATCH 0506/4335] ALSA: usb-audio: Move generic implicit fb quirk entries into quirks.c [ Upstream commit 67d64069bc0867e52e73a1e255b17462005ca9b4 ] Use the new quirk bits to manage the generic implicit fb quirk entries. This makes easier to compare with other devices. Link: https://lore.kernel.org/r/20220421064101.12456-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/implicit.c | 5 ----- sound/usb/quirks.c | 6 ++++++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 1fd087128538d..e1bf1b5da423c 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -45,11 +45,6 @@ struct snd_usb_implicit_fb_match { /* Implicit feedback quirk table for playback */ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { - /* Generic matching */ - IMPLICIT_FB_GENERIC_DEV(0x0499, 0x1509), /* Steinberg UR22 */ - IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2030), /* M-Audio Fast Track C400 */ - IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2031), /* M-Audio Fast Track C600 */ - /* Fixed EP */ /* FIXME: check the availability of generic matching */ IMPLICIT_FB_FIXED_DEV(0x0763, 0x2080, 0x81, 2), /* M-Audio FastTrack Ultra */ diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index fbbe59054c3fb..e8468f9b007d1 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1793,6 +1793,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x046d, 0x09a4, /* Logitech QuickCam E 3500 */ QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR), + DEVICE_FLG(0x0499, 0x1509, /* Steinberg UR22 */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x04d8, 0xfeea, /* Benchmark DAC1 Pre */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04e8, 0xa051, /* Samsung USBC Headset (AKG) */ @@ -1826,6 +1828,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x074d, 0x3553, /* Outlaw RR2150 (Micronas UAC3553B) */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x0763, 0x2030, /* M-Audio Fast Track C400 */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x0763, 0x2031, /* M-Audio Fast Track C600 */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x0951, 0x16ad, /* Kingston HyperX */ -- GitLab From de8f0b173d0751b089827fbacfef1d0b4d197014 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Sun, 10 Apr 2022 15:07:54 +0200 Subject: [PATCH 0507/4335] ARM: OMAP1: clock: Fix UART rate reporting algorithm [ Upstream commit 338d5d476cde853dfd97378d20496baabc2ce3c0 ] Since its introduction to the mainline kernel, omap1_uart_recalc() helper makes incorrect use of clk->enable_bit as a ready to use bitmap mask while it only provides the bit number. Fix it. Signed-off-by: Janusz Krzysztofik Acked-by: Tony Lindgren Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm/mach-omap1/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index 9d4a0ab50a468..d63d5eb8d8fdf 100644 --- a/arch/arm/mach-omap1/clock.c +++ b/arch/arm/mach-omap1/clock.c @@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(clockfw_lock); unsigned long omap1_uart_recalc(struct clk *clk) { unsigned int val = __raw_readl(clk->enable_reg); - return val & clk->enable_bit ? 48000000 : 12000000; + return val & 1 << clk->enable_bit ? 48000000 : 12000000; } unsigned long omap1_sossi_recalc(struct clk *clk) -- GitLab From a0f7180a2c1cd4cc4430d7351c4de09b8616f3f7 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Wed, 21 Apr 2021 23:20:52 +0530 Subject: [PATCH 0508/4335] powerpc/fadump: Fix fadump to work with a different endian capture kernel [ Upstream commit b74196af372f7cb4902179009265fe63ac81824f ] Dump capture would fail if capture kernel is not of the endianess as the production kernel, because the in-memory data structure (struct opal_fadump_mem_struct) shared across production kernel and capture kernel assumes the same endianess for both the kernels, which doesn't have to be true always. Fix it by having a well-defined endianess for struct opal_fadump_mem_struct. Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/161902744901.86147.14719228311655123526.stgit@hbathini Signed-off-by: Sasha Levin --- arch/powerpc/platforms/powernv/opal-fadump.c | 94 +++++++++++--------- arch/powerpc/platforms/powernv/opal-fadump.h | 10 +-- 2 files changed, 57 insertions(+), 47 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index 9a360ced663b0..e23a51a05f99a 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -60,7 +60,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) addr = be64_to_cpu(addr); pr_debug("Kernel metadata addr: %llx\n", addr); opal_fdm_active = (void *)addr; - if (opal_fdm_active->registered_regions == 0) + if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) return; ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr); @@ -95,17 +95,17 @@ static int opal_fadump_unregister(struct fw_dump *fadump_conf); static void opal_fadump_update_config(struct fw_dump *fadump_conf, const struct opal_fadump_mem_struct *fdm) { - pr_debug("Boot memory regions count: %d\n", fdm->region_cnt); + pr_debug("Boot memory regions count: %d\n", be16_to_cpu(fdm->region_cnt)); /* * The destination address of the first boot memory region is the * destination address of boot memory regions. */ - fadump_conf->boot_mem_dest_addr = fdm->rgn[0].dest; + fadump_conf->boot_mem_dest_addr = be64_to_cpu(fdm->rgn[0].dest); pr_debug("Destination address of boot memory regions: %#016llx\n", fadump_conf->boot_mem_dest_addr); - fadump_conf->fadumphdr_addr = fdm->fadumphdr_addr; + fadump_conf->fadumphdr_addr = be64_to_cpu(fdm->fadumphdr_addr); } /* @@ -126,9 +126,9 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, fadump_conf->boot_memory_size = 0; pr_debug("Boot memory regions:\n"); - for (i = 0; i < fdm->region_cnt; i++) { - base = fdm->rgn[i].src; - size = fdm->rgn[i].size; + for (i = 0; i < be16_to_cpu(fdm->region_cnt); i++) { + base = be64_to_cpu(fdm->rgn[i].src); + size = be64_to_cpu(fdm->rgn[i].size); pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size); fadump_conf->boot_mem_addr[i] = base; @@ -143,7 +143,7 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, * Start address of reserve dump area (permanent reservation) for * re-registering FADump after dump capture. */ - fadump_conf->reserve_dump_area_start = fdm->rgn[0].dest; + fadump_conf->reserve_dump_area_start = be64_to_cpu(fdm->rgn[0].dest); /* * Rarely, but it can so happen that system crashes before all @@ -155,13 +155,14 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, * Hope the memory that could not be preserved only has pages * that are usually filtered out while saving the vmcore. */ - if (fdm->region_cnt > fdm->registered_regions) { + if (be16_to_cpu(fdm->region_cnt) > be16_to_cpu(fdm->registered_regions)) { pr_warn("Not all memory regions were saved!!!\n"); pr_warn(" Unsaved memory regions:\n"); - i = fdm->registered_regions; - while (i < fdm->region_cnt) { + i = be16_to_cpu(fdm->registered_regions); + while (i < be16_to_cpu(fdm->region_cnt)) { pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n", - i, fdm->rgn[i].src, fdm->rgn[i].size); + i, be64_to_cpu(fdm->rgn[i].src), + be64_to_cpu(fdm->rgn[i].size)); i++; } @@ -170,7 +171,7 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, } fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size); - fadump_conf->boot_mem_regs_cnt = fdm->region_cnt; + fadump_conf->boot_mem_regs_cnt = be16_to_cpu(fdm->region_cnt); opal_fadump_update_config(fadump_conf, fdm); } @@ -178,35 +179,38 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm) { fdm->version = OPAL_FADUMP_VERSION; - fdm->region_cnt = 0; - fdm->registered_regions = 0; - fdm->fadumphdr_addr = 0; + fdm->region_cnt = cpu_to_be16(0); + fdm->registered_regions = cpu_to_be16(0); + fdm->fadumphdr_addr = cpu_to_be64(0); } static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf) { u64 addr = fadump_conf->reserve_dump_area_start; + u16 reg_cnt; int i; opal_fdm = __va(fadump_conf->kernel_metadata); opal_fadump_init_metadata(opal_fdm); /* Boot memory regions */ + reg_cnt = be16_to_cpu(opal_fdm->region_cnt); for (i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) { - opal_fdm->rgn[i].src = fadump_conf->boot_mem_addr[i]; - opal_fdm->rgn[i].dest = addr; - opal_fdm->rgn[i].size = fadump_conf->boot_mem_sz[i]; + opal_fdm->rgn[i].src = cpu_to_be64(fadump_conf->boot_mem_addr[i]); + opal_fdm->rgn[i].dest = cpu_to_be64(addr); + opal_fdm->rgn[i].size = cpu_to_be64(fadump_conf->boot_mem_sz[i]); - opal_fdm->region_cnt++; + reg_cnt++; addr += fadump_conf->boot_mem_sz[i]; } + opal_fdm->region_cnt = cpu_to_be16(reg_cnt); /* * Kernel metadata is passed to f/w and retrieved in capture kerenl. * So, use it to save fadump header address instead of calculating it. */ - opal_fdm->fadumphdr_addr = (opal_fdm->rgn[0].dest + - fadump_conf->boot_memory_size); + opal_fdm->fadumphdr_addr = cpu_to_be64(be64_to_cpu(opal_fdm->rgn[0].dest) + + fadump_conf->boot_memory_size); opal_fadump_update_config(fadump_conf, opal_fdm); @@ -269,18 +273,21 @@ static u64 opal_fadump_get_bootmem_min(void) static int opal_fadump_register(struct fw_dump *fadump_conf) { s64 rc = OPAL_PARAMETER; + u16 registered_regs; int i, err = -EIO; - for (i = 0; i < opal_fdm->region_cnt; i++) { + registered_regs = be16_to_cpu(opal_fdm->registered_regions); + for (i = 0; i < be16_to_cpu(opal_fdm->region_cnt); i++) { rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE, - opal_fdm->rgn[i].src, - opal_fdm->rgn[i].dest, - opal_fdm->rgn[i].size); + be64_to_cpu(opal_fdm->rgn[i].src), + be64_to_cpu(opal_fdm->rgn[i].dest), + be64_to_cpu(opal_fdm->rgn[i].size)); if (rc != OPAL_SUCCESS) break; - opal_fdm->registered_regions++; + registered_regs++; } + opal_fdm->registered_regions = cpu_to_be16(registered_regs); switch (rc) { case OPAL_SUCCESS: @@ -291,7 +298,8 @@ static int opal_fadump_register(struct fw_dump *fadump_conf) case OPAL_RESOURCE: /* If MAX regions limit in f/w is hit, warn and proceed. */ pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n", - (opal_fdm->region_cnt - opal_fdm->registered_regions)); + (be16_to_cpu(opal_fdm->region_cnt) - + be16_to_cpu(opal_fdm->registered_regions))); fadump_conf->dump_registered = 1; err = 0; break; @@ -312,7 +320,7 @@ static int opal_fadump_register(struct fw_dump *fadump_conf) * If some regions were registered before OPAL_MPIPL_ADD_RANGE * OPAL call failed, unregister all regions. */ - if ((err < 0) && (opal_fdm->registered_regions > 0)) + if ((err < 0) && (be16_to_cpu(opal_fdm->registered_regions) > 0)) opal_fadump_unregister(fadump_conf); return err; @@ -328,7 +336,7 @@ static int opal_fadump_unregister(struct fw_dump *fadump_conf) return -EIO; } - opal_fdm->registered_regions = 0; + opal_fdm->registered_regions = cpu_to_be16(0); fadump_conf->dump_registered = 0; return 0; } @@ -563,19 +571,20 @@ static void opal_fadump_region_show(struct fw_dump *fadump_conf, else fdm_ptr = opal_fdm; - for (i = 0; i < fdm_ptr->region_cnt; i++) { + for (i = 0; i < be16_to_cpu(fdm_ptr->region_cnt); i++) { /* * Only regions that are registered for MPIPL * would have dump data. */ if ((fadump_conf->dump_active) && - (i < fdm_ptr->registered_regions)) - dumped_bytes = fdm_ptr->rgn[i].size; + (i < be16_to_cpu(fdm_ptr->registered_regions))) + dumped_bytes = be64_to_cpu(fdm_ptr->rgn[i].size); seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ", - fdm_ptr->rgn[i].src, fdm_ptr->rgn[i].dest); + be64_to_cpu(fdm_ptr->rgn[i].src), + be64_to_cpu(fdm_ptr->rgn[i].dest)); seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", - fdm_ptr->rgn[i].size, dumped_bytes); + be64_to_cpu(fdm_ptr->rgn[i].size), dumped_bytes); } /* Dump is active. Show reserved area start address. */ @@ -624,6 +633,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { const __be32 *prop; unsigned long dn; + __be64 be_addr; u64 addr = 0; int i, len; s64 ret; @@ -680,13 +690,13 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) if (!prop) return; - ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr); - if ((ret != OPAL_SUCCESS) || !addr) { + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &be_addr); + if ((ret != OPAL_SUCCESS) || !be_addr) { pr_err("Failed to get Kernel metadata (%lld)\n", ret); return; } - addr = be64_to_cpu(addr); + addr = be64_to_cpu(be_addr); pr_debug("Kernel metadata addr: %llx\n", addr); opal_fdm_active = __va(addr); @@ -697,14 +707,14 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) } /* Kernel regions not registered with f/w for MPIPL */ - if (opal_fdm_active->registered_regions == 0) { + if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) { opal_fdm_active = NULL; return; } - ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr); - if (addr) { - addr = be64_to_cpu(addr); + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &be_addr); + if (be_addr) { + addr = be64_to_cpu(be_addr); pr_debug("CPU metadata addr: %llx\n", addr); opal_cpu_metadata = __va(addr); } diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h index f1e9ecf548c5d..3f715efb0aa6e 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.h +++ b/arch/powerpc/platforms/powernv/opal-fadump.h @@ -31,14 +31,14 @@ * OPAL FADump kernel metadata * * The address of this structure will be registered with f/w for retrieving - * and processing during crash dump. + * in the capture kernel to process the crash dump. */ struct opal_fadump_mem_struct { u8 version; u8 reserved[3]; - u16 region_cnt; /* number of regions */ - u16 registered_regions; /* Regions registered for MPIPL */ - u64 fadumphdr_addr; + __be16 region_cnt; /* number of regions */ + __be16 registered_regions; /* Regions registered for MPIPL */ + __be64 fadumphdr_addr; struct opal_mpipl_region rgn[FADUMP_MAX_MEM_REGS]; } __packed; @@ -135,7 +135,7 @@ static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt, for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) { reg_entry = (struct hdat_fadump_reg_entry *)bufp; val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) : - reg_entry->reg_val); + (u64)(reg_entry->reg_val)); opal_fadump_set_regval_regnum(regs, be32_to_cpu(reg_entry->reg_type), be32_to_cpu(reg_entry->reg_num), -- GitLab From 50c292b469d4b5f7a6f1c8dadab4d78c136738f3 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Fri, 29 Apr 2022 14:38:02 -0700 Subject: [PATCH 0509/4335] fat: add ratelimit to fat*_ent_bread() [ Upstream commit 183c3237c928109d2008c0456dff508baf692b20 ] fat*_ent_bread() can be the cause of too many report on I/O error path. So use fat_msg_ratelimit() instead. Link: https://lkml.kernel.org/r/87bkxogfeq.fsf@mail.parknet.co.jp Signed-off-by: OGAWA Hirofumi Reported-by: qianfan Tested-by: qianfan Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/fat/fatent.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 978ac6751aeb7..1db348f8f887a 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -94,7 +94,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, err_brelse: brelse(bhs[0]); err: - fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr); + fat_msg_ratelimit(sb, KERN_ERR, "FAT read failed (blocknr %llu)", + (llu)blocknr); return -EIO; } @@ -107,8 +108,8 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, fatent->fat_inode = MSDOS_SB(sb)->fat_inode; fatent->bhs[0] = sb_bread(sb, blocknr); if (!fatent->bhs[0]) { - fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", - (llu)blocknr); + fat_msg_ratelimit(sb, KERN_ERR, "FAT read failed (blocknr %llu)", + (llu)blocknr); return -EIO; } fatent->nr_bhs = 1; -- GitLab From c16b59d445135c8026a04e388d8b2762feaa3b3b Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 29 Apr 2022 16:26:37 +0800 Subject: [PATCH 0510/4335] pinctrl: renesas: rzn1: Fix possible null-ptr-deref in sh_pfc_map_resources() [ Upstream commit 2f661477c2bb8068194dbba9738d05219f111c6e ] It will cause null-ptr-deref when using 'res', if platform_get_resource() returns NULL, so move using 'res' after devm_ioremap_resource() that will check it to avoid null-ptr-deref. And use devm_platform_get_and_ioremap_resource() to simplify code. Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220429082637.1308182-2-yangyingliang@huawei.com Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- drivers/pinctrl/renesas/pinctrl-rzn1.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/renesas/pinctrl-rzn1.c b/drivers/pinctrl/renesas/pinctrl-rzn1.c index ef5fb25b6016d..849d091205d4d 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzn1.c +++ b/drivers/pinctrl/renesas/pinctrl-rzn1.c @@ -865,17 +865,15 @@ static int rzn1_pinctrl_probe(struct platform_device *pdev) ipctl->mdio_func[0] = -1; ipctl->mdio_func[1] = -1; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ipctl->lev1_protect_phys = (u32)res->start + 0x400; - ipctl->lev1 = devm_ioremap_resource(&pdev->dev, res); + ipctl->lev1 = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(ipctl->lev1)) return PTR_ERR(ipctl->lev1); + ipctl->lev1_protect_phys = (u32)res->start + 0x400; - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - ipctl->lev2_protect_phys = (u32)res->start + 0x400; - ipctl->lev2 = devm_ioremap_resource(&pdev->dev, res); + ipctl->lev2 = devm_platform_get_and_ioremap_resource(pdev, 1, &res); if (IS_ERR(ipctl->lev2)) return PTR_ERR(ipctl->lev2); + ipctl->lev2_protect_phys = (u32)res->start + 0x400; ipctl->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(ipctl->clk)) -- GitLab From 83c329b980bddbc8c6a3d287d91f2103a4d4a860 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Fri, 29 Apr 2022 01:03:56 +0200 Subject: [PATCH 0511/4335] ARM: versatile: Add missing of_node_put in dcscb_init [ Upstream commit 23b44f9c649bbef10b45fa33080cd8b4166800ae ] The device_node pointer is returned by of_find_compatible_node with refcount incremented. We should use of_node_put() to avoid the refcount leak. Signed-off-by: Peng Wu Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220428230356.69418-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm/mach-vexpress/dcscb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c index a0554d7d04f7c..e1adc098f89ac 100644 --- a/arch/arm/mach-vexpress/dcscb.c +++ b/arch/arm/mach-vexpress/dcscb.c @@ -144,6 +144,7 @@ static int __init dcscb_init(void) if (!node) return -ENODEV; dcscb_base = of_iomap(node, 0); + of_node_put(node); if (!dcscb_base) return -EADDRNOTAVAIL; cfg = readl_relaxed(dcscb_base + DCS_CFG_R); -- GitLab From 80b06c4afc14fc9f7db38dff3ff2a05ca738a58c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 26 Apr 2022 20:34:43 +0200 Subject: [PATCH 0512/4335] ARM: dts: exynos: add atmel,24c128 fallback to Samsung EEPROM [ Upstream commit f038e8186fbc5723d7d38c6fa1d342945107347e ] The Samsung s524ad0xd1 EEPROM should use atmel,24c128 fallback, according to the AT24 EEPROM bindings. Reported-by: Rob Herring Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220426183443.243113-1-krzysztof.kozlowski@linaro.org Signed-off-by: Sasha Levin --- arch/arm/boot/dts/exynos5250-smdk5250.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/exynos5250-smdk5250.dts b/arch/arm/boot/dts/exynos5250-smdk5250.dts index f042954bdfa5d..e4861415a0fe5 100644 --- a/arch/arm/boot/dts/exynos5250-smdk5250.dts +++ b/arch/arm/boot/dts/exynos5250-smdk5250.dts @@ -129,7 +129,7 @@ samsung,i2c-max-bus-freq = <20000>; eeprom@50 { - compatible = "samsung,s524ad0xd1"; + compatible = "samsung,s524ad0xd1", "atmel,24c128"; reg = <0x50>; }; @@ -289,7 +289,7 @@ samsung,i2c-max-bus-freq = <20000>; eeprom@51 { - compatible = "samsung,s524ad0xd1"; + compatible = "samsung,s524ad0xd1", "atmel,24c128"; reg = <0x51>; }; -- GitLab From a3265a9440030068547a20dfee646666f3ca5278 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Thu, 28 Apr 2022 10:43:06 +0000 Subject: [PATCH 0513/4335] ARM: hisi: Add missing of_node_put after of_find_compatible_node [ Upstream commit 9bc72e47d4630d58a840a66a869c56b29554cfe4 ] of_find_compatible_node will increment the refcount of the returned device_node. Calling of_node_put() to avoid the refcount leak Signed-off-by: Peng Wu Signed-off-by: Wei Xu Signed-off-by: Sasha Levin --- arch/arm/mach-hisi/platsmp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-hisi/platsmp.c b/arch/arm/mach-hisi/platsmp.c index a56cc64deeb8f..9ce93e0b6cdc3 100644 --- a/arch/arm/mach-hisi/platsmp.c +++ b/arch/arm/mach-hisi/platsmp.c @@ -67,14 +67,17 @@ static void __init hi3xxx_smp_prepare_cpus(unsigned int max_cpus) } ctrl_base = of_iomap(np, 0); if (!ctrl_base) { + of_node_put(np); pr_err("failed to map address\n"); return; } if (of_property_read_u32(np, "smp-offset", &offset) < 0) { + of_node_put(np); pr_err("failed to find smp-offset property\n"); return; } ctrl_base += offset; + of_node_put(np); } } @@ -160,6 +163,7 @@ static int hip01_boot_secondary(unsigned int cpu, struct task_struct *idle) if (WARN_ON(!node)) return -1; ctrl_base = of_iomap(node, 0); + of_node_put(node); /* set the secondary core boot from DDR */ remap_reg_value = readl_relaxed(ctrl_base + REG_SC_CTRL); -- GitLab From 4668e18bc2993959a8f409ced4adf2fd295e0e81 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 4 May 2022 13:51:35 +0530 Subject: [PATCH 0514/4335] cpufreq: Avoid unnecessary frequency updates due to mismatch [ Upstream commit f55ae08c89873e140c7cac2a7fa161d31a0d60cf ] For some platforms, the frequency returned by hardware may be slightly different from what is provided in the frequency table. For example, hardware may return 499 MHz instead of 500 MHz. In such cases it is better to avoid getting into unnecessary frequency updates, as we may end up switching policy->cur between the two and sending unnecessary pre/post update notifications, etc. This patch has chosen allows the hardware frequency and table frequency to deviate by 1 MHz for now, we may want to increase it a bit later on if someone still complains. Reported-by: Rex-BC Chen Signed-off-by: Viresh Kumar Tested-by: Jia-wei Chang Reviewed-by: Matthias Brugger Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/cpufreq.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index eeac6d8092298..cddf7e13c2322 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -28,6 +28,7 @@ #include #include #include +#include #include static LIST_HEAD(cpufreq_policy_list); @@ -1701,6 +1702,16 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b return new_freq; if (policy->cur != new_freq) { + /* + * For some platforms, the frequency returned by hardware may be + * slightly different from what is provided in the frequency + * table, for example hardware may return 499 MHz instead of 500 + * MHz. In such cases it is better to avoid getting into + * unnecessary frequency updates. + */ + if (abs(policy->cur - new_freq) < HZ_PER_MHZ) + return policy->cur; + cpufreq_out_of_sync(policy, new_freq); if (update) schedule_work(&policy->update); -- GitLab From 5ca40fcf0da0ce2b5bc44e7d8b036535955f2e3d Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Wed, 4 May 2022 12:12:44 +0200 Subject: [PATCH 0515/4335] powerpc/rtas: Keep MSR[RI] set when calling RTAS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b6b1c3ce06ca438eb24e0f45bf0e63ecad0369f5 ] RTAS runs in real mode (MSR[DR] and MSR[IR] unset) and in 32-bit big endian mode (MSR[SF,LE] unset). The change in MSR is done in enter_rtas() in a relatively complex way, since the MSR value could be hardcoded. Furthermore, a panic has been reported when hitting the watchdog interrupt while running in RTAS, this leads to the following stack trace: watchdog: CPU 24 Hard LOCKUP watchdog: CPU 24 TB:997512652051031, last heartbeat TB:997504470175378 (15980ms ago) ... Supported: No, Unreleased kernel CPU: 24 PID: 87504 Comm: drmgr Kdump: loaded Tainted: G E X 5.14.21-150400.71.1.bz196362_2-default #1 SLE15-SP4 (unreleased) 0d821077ef4faa8dfaf370efb5fdca1fa35f4e2c NIP: 000000001fb41050 LR: 000000001fb4104c CTR: 0000000000000000 REGS: c00000000fc33d60 TRAP: 0100 Tainted: G E X (5.14.21-150400.71.1.bz196362_2-default) MSR: 8000000002981000 CR: 48800002 XER: 20040020 CFAR: 000000000000011c IRQMASK: 1 GPR00: 0000000000000003 ffffffffffffffff 0000000000000001 00000000000050dc GPR04: 000000001ffb6100 0000000000000020 0000000000000001 000000001fb09010 GPR08: 0000000020000000 0000000000000000 0000000000000000 0000000000000000 GPR12: 80040000072a40a8 c00000000ff8b680 0000000000000007 0000000000000034 GPR16: 000000001fbf6e94 000000001fbf6d84 000000001fbd1db0 000000001fb3f008 GPR20: 000000001fb41018 ffffffffffffffff 000000000000017f fffffffffffff68f GPR24: 000000001fb18fe8 000000001fb3e000 000000001fb1adc0 000000001fb1cf40 GPR28: 000000001fb26000 000000001fb460f0 000000001fb17f18 000000001fb17000 NIP [000000001fb41050] 0x1fb41050 LR [000000001fb4104c] 0x1fb4104c Call Trace: Instruction dump: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX Oops: Unrecoverable System Reset, sig: 6 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries ... Supported: No, Unreleased kernel CPU: 24 PID: 87504 Comm: drmgr Kdump: loaded Tainted: G E X 5.14.21-150400.71.1.bz196362_2-default #1 SLE15-SP4 (unreleased) 0d821077ef4faa8dfaf370efb5fdca1fa35f4e2c NIP: 000000001fb41050 LR: 000000001fb4104c CTR: 0000000000000000 REGS: c00000000fc33d60 TRAP: 0100 Tainted: G E X (5.14.21-150400.71.1.bz196362_2-default) MSR: 8000000002981000 CR: 48800002 XER: 20040020 CFAR: 000000000000011c IRQMASK: 1 GPR00: 0000000000000003 ffffffffffffffff 0000000000000001 00000000000050dc GPR04: 000000001ffb6100 0000000000000020 0000000000000001 000000001fb09010 GPR08: 0000000020000000 0000000000000000 0000000000000000 0000000000000000 GPR12: 80040000072a40a8 c00000000ff8b680 0000000000000007 0000000000000034 GPR16: 000000001fbf6e94 000000001fbf6d84 000000001fbd1db0 000000001fb3f008 GPR20: 000000001fb41018 ffffffffffffffff 000000000000017f fffffffffffff68f GPR24: 000000001fb18fe8 000000001fb3e000 000000001fb1adc0 000000001fb1cf40 GPR28: 000000001fb26000 000000001fb460f0 000000001fb17f18 000000001fb17000 NIP [000000001fb41050] 0x1fb41050 LR [000000001fb4104c] 0x1fb4104c Call Trace: Instruction dump: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX ---[ end trace 3ddec07f638c34a2 ]--- This happens because MSR[RI] is unset when entering RTAS but there is no valid reason to not set it here. RTAS is expected to be called with MSR[RI] as specified in PAPR+ section "7.2.1 Machine State": R1–7.2.1–9. If called with MSR[RI] equal to 1, then RTAS must protect its own critical regions from recursion by setting the MSR[RI] bit to 0 when in the critical regions. Fixing this by reviewing the way MSR is compute before calling RTAS. Now a hardcoded value meaning real mode, 32 bits big endian mode and Recoverable Interrupt is loaded. In the case MSR[S] is set, it will remain set while entering RTAS as only urfid can unset it (thanks Fabiano). In addition a check is added in do_enter_rtas() to detect calls made with MSR[RI] unset, as we are forcing it on later. This patch has been tested on the following machines: Power KVM Guest P8 S822L (host Ubuntu kernel 5.11.0-49-generic) PowerVM LPAR P8 9119-MME (FW860.A1) p9 9008-22L (FW950.00) P10 9080-HEX (FW1010.00) Suggested-by: Nicholas Piggin Signed-off-by: Laurent Dufour Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220504101244.12107-1-ldufour@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/kernel/entry_64.S | 24 ++++++++++++------------ arch/powerpc/kernel/rtas.c | 9 +++++++++ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 70cff7b49e172..07a1448146e27 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -330,22 +330,22 @@ _GLOBAL(enter_rtas) clrldi r4,r4,2 /* convert to realmode address */ mtlr r4 - li r0,0 - ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI - andc r0,r6,r0 - - li r9,1 - rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG) - ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE - andc r6,r0,r9 - __enter_rtas: - sync /* disable interrupts so SRR0/1 */ - mtmsrd r0 /* don't get trashed */ - LOAD_REG_ADDR(r4, rtas) ld r5,RTASENTRY(r4) /* get the rtas->entry value */ ld r4,RTASBASE(r4) /* get the rtas->base value */ + + /* + * RTAS runs in 32-bit big endian real mode, but leave MSR[RI] on as we + * may hit NMI (SRESET or MCE) while in RTAS. RTAS should disable RI in + * its critical regions (as specified in PAPR+ section 7.2.1). MSR[S] + * is not impacted by RFI_TO_KERNEL (only urfid can unset it). So if + * MSR[S] is set, it will remain when entering RTAS. + */ + LOAD_REG_IMMEDIATE(r6, MSR_ME | MSR_RI) + + li r0,0 + mtmsrd r0,1 /* disable RI before using SRR0/1 */ mtspr SPRN_SRR0,r5 mtspr SPRN_SRR1,r6 diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index e18a725a8e5d3..3f58140370357 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -49,6 +49,15 @@ void enter_rtas(unsigned long); static inline void do_enter_rtas(unsigned long args) { + unsigned long msr; + + /* + * Make sure MSR[RI] is currently enabled as it will be forced later + * in enter_rtas. + */ + msr = mfmsr(); + BUG_ON(!(msr & MSR_RI)); + enter_rtas(args); srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ -- GitLab From 2cdd5284035322795b0964f899eefba254cfe483 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 4 Apr 2022 14:25:39 +0800 Subject: [PATCH 0516/4335] PCI: Avoid pci_dev_lock() AB/BA deadlock with sriov_numvfs_store() [ Upstream commit a91ee0e9fca9d7501286cfbced9b30a33e52740a ] The sysfs sriov_numvfs_store() path acquires the device lock before the config space access lock: sriov_numvfs_store device_lock # A (1) acquire device lock sriov_configure vfio_pci_sriov_configure # (for example) vfio_pci_core_sriov_configure pci_disable_sriov sriov_disable pci_cfg_access_lock pci_wait_cfg # B (4) wait for dev->block_cfg_access == 0 Previously, pci_dev_lock() acquired the config space access lock before the device lock: pci_dev_lock pci_cfg_access_lock dev->block_cfg_access = 1 # B (2) set dev->block_cfg_access = 1 device_lock # A (3) wait for device lock Any path that uses pci_dev_lock(), e.g., pci_reset_function(), may deadlock with sriov_numvfs_store() if the operations occur in the sequence (1) (2) (3) (4). Avoid the deadlock by reversing the order in pci_dev_lock() so it acquires the device lock before the config space access lock, the same as the sriov_numvfs_store() path. [bhelgaas: combined and adapted commit log from Jay Zhou's independent subsequent posting: https://lore.kernel.org/r/20220404062539.1710-1-jianjay.zhou@huawei.com] Link: https://lore.kernel.org/linux-pci/1583489997-17156-1-git-send-email-yangyicong@hisilicon.com/ Also-posted-by: Jay Zhou Signed-off-by: Yicong Yang Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/pci.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 0380543d10fdc..09815cbc18f9a 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5079,18 +5079,18 @@ static int pci_reset_bus_function(struct pci_dev *dev, bool probe) static void pci_dev_lock(struct pci_dev *dev) { - pci_cfg_access_lock(dev); /* block PM suspend, driver probe, etc. */ device_lock(&dev->dev); + pci_cfg_access_lock(dev); } /* Return 1 on successful lock, 0 on contention */ int pci_dev_trylock(struct pci_dev *dev) { - if (pci_cfg_access_trylock(dev)) { - if (device_trylock(&dev->dev)) + if (device_trylock(&dev->dev)) { + if (pci_cfg_access_trylock(dev)) return 1; - pci_cfg_access_unlock(dev); + device_unlock(&dev->dev); } return 0; @@ -5099,8 +5099,8 @@ EXPORT_SYMBOL_GPL(pci_dev_trylock); void pci_dev_unlock(struct pci_dev *dev) { - device_unlock(&dev->dev); pci_cfg_access_unlock(dev); + device_unlock(&dev->dev); } EXPORT_SYMBOL_GPL(pci_dev_unlock); -- GitLab From 6cd8b30790d08bc40e7dfd80ffbaf6cf4bdd0d97 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 3 Mar 2022 15:33:15 +1000 Subject: [PATCH 0517/4335] KVM: PPC: Book3S HV Nested: L2 LPCR should inherit L1 LPES setting [ Upstream commit 2852ebfa10afdcefff35ec72c8da97141df9845c ] The L1 should not be able to adjust LPES mode for the L2. Setting LPES if the L0 needs it clear would cause external interrupts to be sent to L2 and missed by the L0. Clearing LPES when it may be set, as typically happens with XIVE enabled could cause a performance issue despite having no native XIVE support in the guest, because it will cause mediated interrupts for the L2 to be taken in HV mode, which then have to be injected. Signed-off-by: Nicholas Piggin Reviewed-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220303053315.1056880-7-npiggin@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/kvm/book3s_hv.c | 4 ++++ arch/powerpc/kvm/book3s_hv_nested.c | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7fa6857116690..eba77096c4430 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5235,6 +5235,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); lpcr &= LPCR_PECE | LPCR_LPES; } else { + /* + * The L2 LPES mode will be set by the L0 according to whether + * or not it needs to take external interrupts in HV mode. + */ lpcr = 0; } lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 6c4e0e93105ff..ddea14e5cb5e4 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -261,8 +261,7 @@ static void load_l2_hv_regs(struct kvm_vcpu *vcpu, /* * Don't let L1 change LPCR bits for the L2 except these: */ - mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | - LPCR_LPES | LPCR_MER; + mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | LPCR_MER; /* * Additional filtering is required depending on hardware -- GitLab From 72f58a176a021ac7802e816ca4bf82ac1f018896 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 May 2022 20:23:01 -0700 Subject: [PATCH 0518/4335] alpha: fix alloc_zeroed_user_highpage_movable() [ Upstream commit f9c668d281aa20e38c9bda3b7b0adeb8891aa15e ] Due to a typo, the final argument to alloc_page_vma() didn't refer to a real variable. This only affected CONFIG_NUMA, which was marked BROKEN in 2006 and removed from alpha in 2021. Found due to a refactoring patch. Link: https://lkml.kernel.org/r/20220504182857.4013401-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reported-by: kernel test robot Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- arch/alpha/include/asm/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h index 18f48a6f2ff6d..8f3f5eecba28b 100644 --- a/arch/alpha/include/asm/page.h +++ b/arch/alpha/include/asm/page.h @@ -18,7 +18,7 @@ extern void clear_page(void *page); #define clear_user_page(page, vaddr, pg) clear_page(page) #define alloc_zeroed_user_highpage_movable(vma, vaddr) \ - alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vmaddr) + alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE extern void copy_page(void * _to, void * _from); -- GitLab From 5eed36dc775eceae1cfecd7fbbac93d0fd812df3 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 11 May 2022 12:46:53 +0300 Subject: [PATCH 0519/4335] tracing: incorrect isolate_mote_t cast in mm_vmscan_lru_isolate [ Upstream commit 2b132903de7124dd9a758be0c27562e91a510848 ] Fixes following sparse warnings: CHECK mm/vmscan.c mm/vmscan.c: note: in included file (through include/trace/trace_events.h, include/trace/define_trace.h, include/trace/events/vmscan.h): ./include/trace/events/vmscan.h:281:1: sparse: warning: cast to restricted isolate_mode_t ./include/trace/events/vmscan.h:281:1: sparse: warning: restricted isolate_mode_t degrades to integer Link: https://lkml.kernel.org/r/e85d7ff2-fd10-53f8-c24e-ba0458439c1b@openvz.org Signed-off-by: Vasily Averin Acked-by: Steven Rostedt (Google) Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- include/trace/events/vmscan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 88faf2400ec25..b2eeeb0800126 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -283,7 +283,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate, __field(unsigned long, nr_scanned) __field(unsigned long, nr_skipped) __field(unsigned long, nr_taken) - __field(isolate_mode_t, isolate_mode) + __field(unsigned int, isolate_mode) __field(int, lru) ), @@ -294,7 +294,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate, __entry->nr_scanned = nr_scanned; __entry->nr_skipped = nr_skipped; __entry->nr_taken = nr_taken; - __entry->isolate_mode = isolate_mode; + __entry->isolate_mode = (__force unsigned int)isolate_mode; __entry->lru = lru; ), -- GitLab From ce29ea35402dd407ea8fa56e3bee676df9bad3b2 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Sat, 9 Apr 2022 01:44:16 -0700 Subject: [PATCH 0520/4335] powerpc/powernv/vas: Assign real address to rx_fifo in vas_rx_win_attr [ Upstream commit c127d130f6d59fa81701f6b04023cf7cd1972fb3 ] In init_winctx_regs(), __pa() is called on winctx->rx_fifo and this function is called to initialize registers for receive and fault windows. But the real address is passed in winctx->rx_fifo for receive windows and the virtual address for fault windows which causes errors with DEBUG_VIRTUAL enabled. Fixes this issue by assigning only real address to rx_fifo in vas_rx_win_attr struct for both receive and fault windows. Reported-by: Michael Ellerman Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/338e958c7ab8f3b266fa794a1f80f99b9671829e.camel@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/vas.h | 2 +- arch/powerpc/platforms/powernv/vas-fault.c | 2 +- arch/powerpc/platforms/powernv/vas-window.c | 4 ++-- arch/powerpc/platforms/powernv/vas.h | 2 +- drivers/crypto/nx/nx-common-powernv.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 57573d9c1e091..56834a8a14654 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -112,7 +112,7 @@ static inline void vas_user_win_add_mm_context(struct vas_user_win_ref *ref) * Receive window attributes specified by the (in-kernel) owner of window. */ struct vas_rx_win_attr { - void *rx_fifo; + u64 rx_fifo; int rx_fifo_size; int wcreds_max; diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index a7aabc18039eb..c1bfad56447d4 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -216,7 +216,7 @@ int vas_setup_fault_window(struct vas_instance *vinst) vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT); attr.rx_fifo_size = vinst->fault_fifo_size; - attr.rx_fifo = vinst->fault_fifo; + attr.rx_fifo = __pa(vinst->fault_fifo); /* * Max creds is based on number of CRBs can fit in the FIFO. diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 0f8d39fbf2b21..0072682531d80 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -404,7 +404,7 @@ static void init_winctx_regs(struct pnv_vas_window *window, * * See also: Design note in function header. */ - val = __pa(winctx->rx_fifo); + val = winctx->rx_fifo; val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0); write_hvwc_reg(window, VREG(LFIFO_BAR), val); @@ -739,7 +739,7 @@ static void init_winctx_for_rxwin(struct pnv_vas_window *rxwin, */ winctx->fifo_disable = true; winctx->intr_disable = true; - winctx->rx_fifo = NULL; + winctx->rx_fifo = 0; } winctx->lnotify_lpid = rxattr->lnotify_lpid; diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index 8bb08e395de05..08d9d3d5a22b0 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -376,7 +376,7 @@ struct pnv_vas_window { * is a container for the register fields in the window context. */ struct vas_winctx { - void *rx_fifo; + u64 rx_fifo; int rx_fifo_size; int wcreds_max; int rsvd_txbuf_count; diff --git a/drivers/crypto/nx/nx-common-powernv.c b/drivers/crypto/nx/nx-common-powernv.c index 32a036ada5d0a..f418817c0f43e 100644 --- a/drivers/crypto/nx/nx-common-powernv.c +++ b/drivers/crypto/nx/nx-common-powernv.c @@ -827,7 +827,7 @@ static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id, goto err_out; vas_init_rx_win_attr(&rxattr, coproc->ct); - rxattr.rx_fifo = (void *)rx_fifo; + rxattr.rx_fifo = rx_fifo; rxattr.rx_fifo_size = fifo_size; rxattr.lnotify_lpid = lpid; rxattr.lnotify_pid = pid; -- GitLab From df802880a7f9cd96b921b00639b00871f18a9a57 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Sat, 2 Apr 2022 01:34:19 +0000 Subject: [PATCH 0521/4335] powerpc/xics: fix refcount leak in icp_opal_init() [ Upstream commit 5dd9e27ea4a39f7edd4bf81e9e70208e7ac0b7c9 ] The of_find_compatible_node() function returns a node pointer with refcount incremented, use of_node_put() on it when done. Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220402013419.2410298-1-lv.ruyi@zte.com.cn Signed-off-by: Sasha Levin --- arch/powerpc/sysdev/xics/icp-opal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index 675d708863d57..db0452e7c3515 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -196,6 +196,7 @@ int icp_opal_init(void) printk("XICS: Using OPAL ICP fallbacks\n"); + of_node_put(np); return 0; } -- GitLab From a4a6a3826943f58c119c35317e1e5aa6146772b2 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Thu, 7 Apr 2022 09:00:43 +0000 Subject: [PATCH 0522/4335] powerpc/powernv: fix missing of_node_put in uv_init() [ Upstream commit 3ffa9fd471f57f365bc54fc87824c530422f64a5 ] of_find_compatible_node() returns node pointer with refcount incremented, use of_node_put() on it when done. Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220407090043.2491854-1-lv.ruyi@zte.com.cn Signed-off-by: Sasha Levin --- arch/powerpc/platforms/powernv/ultravisor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c index e4a00ad06f9d3..67c8c4b2d8b17 100644 --- a/arch/powerpc/platforms/powernv/ultravisor.c +++ b/arch/powerpc/platforms/powernv/ultravisor.c @@ -55,6 +55,7 @@ static int __init uv_init(void) return -ENODEV; uv_memcons = memcons_init(node, "memcons"); + of_node_put(node); if (!uv_memcons) return -ENOENT; -- GitLab From 787255e6190a9bc171dbab642344dfb37b0e5ad8 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 7 Apr 2022 20:11:32 +1000 Subject: [PATCH 0523/4335] macintosh/via-pmu: Fix build failure when CONFIG_INPUT is disabled [ Upstream commit 86ce436e30d86327c9f5260f718104ae7b21f506 ] drivers/macintosh/via-pmu-event.o: In function `via_pmu_event': via-pmu-event.c:(.text+0x44): undefined reference to `input_event' via-pmu-event.c:(.text+0x68): undefined reference to `input_event' via-pmu-event.c:(.text+0x94): undefined reference to `input_event' via-pmu-event.c:(.text+0xb8): undefined reference to `input_event' drivers/macintosh/via-pmu-event.o: In function `via_pmu_event_init': via-pmu-event.c:(.init.text+0x20): undefined reference to `input_allocate_device' via-pmu-event.c:(.init.text+0xc4): undefined reference to `input_register_device' via-pmu-event.c:(.init.text+0xd4): undefined reference to `input_free_device' make[1]: *** [Makefile:1155: vmlinux] Error 1 make: *** [Makefile:350: __build_one_by_one] Error 2 Don't call into the input subsystem unless CONFIG_INPUT is built-in. Reported-by: kernel test robot Signed-off-by: Finn Thain Tested-by: Randy Dunlap Reviewed-by: Christophe Leroy Acked-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5edbe76ce68227f71e09af4614cc4c1bd61c7ec8.1649326292.git.fthain@linux-m68k.org Signed-off-by: Sasha Levin --- drivers/macintosh/Kconfig | 4 ++++ drivers/macintosh/Makefile | 3 ++- drivers/macintosh/via-pmu.c | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig index 5cdc361da37cb..3942db15a2b8d 100644 --- a/drivers/macintosh/Kconfig +++ b/drivers/macintosh/Kconfig @@ -67,6 +67,10 @@ config ADB_PMU this device; you should do so if your machine is one of those mentioned above. +config ADB_PMU_EVENT + def_bool y + depends on ADB_PMU && INPUT=y + config ADB_PMU_LED bool "Support for the Power/iBook front LED" depends on PPC_PMAC && ADB_PMU diff --git a/drivers/macintosh/Makefile b/drivers/macintosh/Makefile index 49819b1b6f201..712edcb3e0b08 100644 --- a/drivers/macintosh/Makefile +++ b/drivers/macintosh/Makefile @@ -12,7 +12,8 @@ obj-$(CONFIG_MAC_EMUMOUSEBTN) += mac_hid.o obj-$(CONFIG_INPUT_ADBHID) += adbhid.o obj-$(CONFIG_ANSLCD) += ans-lcd.o -obj-$(CONFIG_ADB_PMU) += via-pmu.o via-pmu-event.o +obj-$(CONFIG_ADB_PMU) += via-pmu.o +obj-$(CONFIG_ADB_PMU_EVENT) += via-pmu-event.o obj-$(CONFIG_ADB_PMU_LED) += via-pmu-led.o obj-$(CONFIG_PMAC_BACKLIGHT) += via-pmu-backlight.o obj-$(CONFIG_ADB_CUDA) += via-cuda.o diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 4b98bc26a94b5..2109129ea1bbf 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -1459,7 +1459,7 @@ next: pmu_pass_intr(data, len); /* len == 6 is probably a bad check. But how do I * know what PMU versions send what events here? */ - if (len == 6) { + if (IS_ENABLED(CONFIG_ADB_PMU_EVENT) && len == 6) { via_pmu_event(PMU_EVT_POWER, !!(data[1]&8)); via_pmu_event(PMU_EVT_LID, data[1]&1); } -- GitLab From df6d8b689252c0acc0448d4ae3d33f2d6db048ab Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Mon, 25 Apr 2022 08:12:45 +0000 Subject: [PATCH 0524/4335] powerpc/iommu: Add missing of_node_put in iommu_init_early_dart [ Upstream commit 57b742a5b8945118022973e6416b71351df512fb ] The device_node pointer is returned by of_find_compatible_node with refcount incremented. We should use of_node_put() to avoid the refcount leak. Signed-off-by: Peng Wu Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220425081245.21705-1-wupeng58@huawei.com Signed-off-by: Sasha Levin --- arch/powerpc/sysdev/dart_iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 1d33b7a5ea832..dc774b204c061 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -404,9 +404,10 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) } /* Initialize the DART HW */ - if (dart_init(dn) != 0) + if (dart_init(dn) != 0) { + of_node_put(dn); return; - + } /* * U4 supports a DART bypass, we use it for 64-bit capable devices to * improve performance. However, that only works for devices connected @@ -419,6 +420,7 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) /* Setup pci_dma ops */ set_pci_dma_ops(&dma_iommu_ops); + of_node_put(dn); } #ifdef CONFIG_PM -- GitLab From 9e5b03ca021430edeed4077c1865a903c4073b26 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 23 May 2022 20:42:03 -0500 Subject: [PATCH 0525/4335] smb3: check for null tcon [ Upstream commit bbdf6cf56c88845fb0b713cbf5c6623c53fe40d8 ] Although unlikely to be null, it is confusing to use a pointer before checking for it to be null so move the use down after null check. Addresses-Coverity: 1517586 ("Null pointer dereferences (REVERSE_INULL)") Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/smb2ops.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index df9ba3729d1f8..775296e4d3c87 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -745,8 +745,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, struct cached_fid **cfid) { - struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = ses->server; + struct cifs_ses *ses; + struct TCP_Server_Info *server; struct cifs_open_parms oparms; struct smb2_create_rsp *o_rsp = NULL; struct smb2_query_info_rsp *qi_rsp = NULL; @@ -764,6 +764,9 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, if (tcon->nohandlecache) return -ENOTSUPP; + ses = tcon->ses; + server = ses->server; + if (cifs_sb->root == NULL) return -ENOENT; -- GitLab From 32e6aea33944f364d51cd263e4cd236393a188b6 Mon Sep 17 00:00:00 2001 From: Douglas Miller Date: Fri, 20 May 2022 14:37:06 -0400 Subject: [PATCH 0526/4335] RDMA/hfi1: Prevent panic when SDMA is disabled [ Upstream commit 629e052d0c98e46dde9f0824f0aa437f678d9b8f ] If the hfi1 module is loaded with HFI1_CAP_SDMA off, a call to hfi1_write_iter() will dereference a NULL pointer and panic. A typical stack frame is: sdma_select_user_engine [hfi1] hfi1_user_sdma_process_request [hfi1] hfi1_write_iter [hfi1] do_iter_readv_writev do_iter_write vfs_writev do_writev do_syscall_64 The fix is to test for SDMA in hfi1_write_iter() and fail the I/O with EINVAL. Link: https://lore.kernel.org/r/20220520183706.48973.79803.stgit@awfm-01.cornelisnetworks.com Signed-off-by: Douglas Miller Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/file_ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 1783a6ea5427b..3ebdd42fec362 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -265,6 +265,8 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) unsigned long dim = from->nr_segs; int idx; + if (!HFI1_CAP_IS_KSET(SDMA)) + return -EINVAL; idx = srcu_read_lock(&fd->pq_srcu); pq = srcu_dereference(fd->pq, &fd->pq_srcu); if (!cq || !pq) { -- GitLab From 96c460687813915dedca9dd7d04ae0e90607fd79 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Wed, 25 May 2022 14:39:28 -0700 Subject: [PATCH 0527/4335] Input: gpio-keys - cancel delayed work only in case of GPIO [ Upstream commit cee409bbba0d1bd3fb73064fb480ff365f453b5d ] gpio_keys module can either accept gpios or interrupts. The module initializes delayed work in case of gpios only and is only used if debounce timer is not used, so make sure cancel_delayed_work_sync() is called only when its gpio-backed and debounce_use_hrtimer is false. This fixes the issue seen below when the gpio_keys module is unloaded and an interrupt pin is used instead of GPIO: [ 360.297569] ------------[ cut here ]------------ [ 360.302303] WARNING: CPU: 0 PID: 237 at kernel/workqueue.c:3066 __flush_work+0x414/0x470 [ 360.310531] Modules linked in: gpio_keys(-) [ 360.314797] CPU: 0 PID: 237 Comm: rmmod Not tainted 5.18.0-rc5-arm64-renesas-00116-g73636105874d-dirty #166 [ 360.324662] Hardware name: Renesas SMARC EVK based on r9a07g054l2 (DT) [ 360.331270] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 360.338318] pc : __flush_work+0x414/0x470 [ 360.342385] lr : __cancel_work_timer+0x140/0x1b0 [ 360.347065] sp : ffff80000a7fba00 [ 360.350423] x29: ffff80000a7fba00 x28: ffff000012b9c5c0 x27: 0000000000000000 [ 360.357664] x26: ffff80000a7fbb80 x25: ffff80000954d0a8 x24: 0000000000000001 [ 360.364904] x23: ffff800009757000 x22: 0000000000000000 x21: ffff80000919b000 [ 360.372143] x20: ffff00000f5974e0 x19: ffff00000f5974e0 x18: ffff8000097fcf48 [ 360.379382] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000053f40 [ 360.386622] x14: ffff800009850e88 x13: 0000000000000002 x12: 000000000000a60c [ 360.393861] x11: 000000000000a610 x10: 0000000000000000 x9 : 0000000000000008 [ 360.401100] x8 : 0101010101010101 x7 : 00000000a473c394 x6 : 0080808080808080 [ 360.408339] x5 : 0000000000000001 x4 : 0000000000000000 x3 : ffff80000919b458 [ 360.415578] x2 : ffff8000097577f0 x1 : 0000000000000001 x0 : 0000000000000000 [ 360.422818] Call trace: [ 360.425299] __flush_work+0x414/0x470 [ 360.429012] __cancel_work_timer+0x140/0x1b0 [ 360.433340] cancel_delayed_work_sync+0x10/0x18 [ 360.437931] gpio_keys_quiesce_key+0x28/0x58 [gpio_keys] [ 360.443327] devm_action_release+0x10/0x18 [ 360.447481] release_nodes+0x8c/0x1a0 [ 360.451194] devres_release_all+0x90/0x100 [ 360.455346] device_unbind_cleanup+0x14/0x60 [ 360.459677] device_release_driver_internal+0xe8/0x168 [ 360.464883] driver_detach+0x4c/0x90 [ 360.468509] bus_remove_driver+0x54/0xb0 [ 360.472485] driver_unregister+0x2c/0x58 [ 360.476462] platform_driver_unregister+0x10/0x18 [ 360.481230] gpio_keys_exit+0x14/0x828 [gpio_keys] [ 360.486088] __arm64_sys_delete_module+0x1e0/0x270 [ 360.490945] invoke_syscall+0x40/0xf8 [ 360.494661] el0_svc_common.constprop.3+0xf0/0x110 [ 360.499515] do_el0_svc+0x20/0x78 [ 360.502877] el0_svc+0x48/0xf8 [ 360.505977] el0t_64_sync_handler+0x88/0xb0 [ 360.510216] el0t_64_sync+0x148/0x14c [ 360.513930] irq event stamp: 4306 [ 360.517288] hardirqs last enabled at (4305): [] __cancel_work_timer+0x130/0x1b0 [ 360.526359] hardirqs last disabled at (4306): [] el1_dbg+0x24/0x88 [ 360.534204] softirqs last enabled at (4278): [] _stext+0x4a0/0x5e0 [ 360.542133] softirqs last disabled at (4267): [] irq_exit_rcu+0x18c/0x1b0 [ 360.550591] ---[ end trace 0000000000000000 ]--- Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/r/20220524135822.14764-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/keyboard/gpio_keys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 8dbf1e69c90ac..22a91db645b8f 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -131,7 +131,7 @@ static void gpio_keys_quiesce_key(void *data) if (!bdata->gpiod) hrtimer_cancel(&bdata->release_timer); - if (bdata->debounce_use_hrtimer) + else if (bdata->debounce_use_hrtimer) hrtimer_cancel(&bdata->debounce_timer); else cancel_delayed_work_sync(&bdata->work); -- GitLab From 710051ebb77b785d637417564be7fed4c13c6ce0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 28 May 2022 11:08:48 -0700 Subject: [PATCH 0528/4335] drm: fix EDID struct for old ARM OABI format [ Upstream commit 47f15561b69e226bfc034e94ff6dbec51a4662af ] When building the kernel for arm with the "-mabi=apcs-gnu" option, gcc will force alignment of all structures and unions to a word boundary (see also STRUCTURE_SIZE_BOUNDARY and the "-mstructure-size-boundary=XX" option if you're a gcc person), even when the members of said structures do not want or need said alignment. This completely messes up the structure alignment of 'struct edid' on those targets, because even though all the embedded structures are marked with "__attribute__((packed))", the unions that contain them are not. This was exposed by commit f1e4c916f97f ("drm/edid: add EDID block count and size helpers"), but the bug is pre-existing. That commit just made the structure layout problem cause a build failure due to the addition of the BUILD_BUG_ON(sizeof(*edid) != EDID_LENGTH); sanity check in drivers/gpu/drm/drm_edid.c:edid_block_data(). This legacy union alignment should probably not be used in the first place, but we can fix the layout by adding the packed attribute to the union entries even when each member is already packed and it shouldn't matter in a sane build environment. You can see this issue with a trivial test program: union { struct { char c[5]; }; struct { char d; unsigned e; } __attribute__((packed)); } a = { "1234" }; where building this with a normal "gcc -S" will result in the expected 5-byte size of said union: .type a, @object .size a, 5 but with an ARM compiler and the old ABI: arm-linux-gnu-gcc -mabi=apcs-gnu -mfloat-abi=soft -S t.c you get .type a, %object .size a, 8 instead, because even though each member of the union is packed, the union itself still gets aligned. This was reported by Sudip for the spear3xx_defconfig target. Link: https://lore.kernel.org/lkml/YpCUzStDnSgQLNFN@debian/ Reported-by: Sudip Mukherjee Acked-by: Arnd Bergmann Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/drm/drm_edid.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index deccfd39e6db2..c24559f5329dd 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -121,7 +121,7 @@ struct detailed_data_monitor_range { u8 supported_scalings; u8 preferred_refresh; } __attribute__((packed)) cvt; - } formula; + } __attribute__((packed)) formula; } __attribute__((packed)); struct detailed_data_wpindex { @@ -154,7 +154,7 @@ struct detailed_non_pixel { struct detailed_data_wpindex color; struct std_timing timings[6]; struct cvt_timing cvt[4]; - } data; + } __attribute__((packed)) data; } __attribute__((packed)); #define EDID_DETAIL_EST_TIMINGS 0xf7 @@ -172,7 +172,7 @@ struct detailed_timing { union { struct detailed_pixel_timing pixel_data; struct detailed_non_pixel other_data; - } data; + } __attribute__((packed)) data; } __attribute__((packed)); #define DRM_EDID_INPUT_SERRATION_VSYNC (1 << 0) -- GitLab From 4399781c49b255ea7a5351459ac55f6b6b1f8614 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Sat, 25 Dec 2021 09:31:51 +0300 Subject: [PATCH 0529/4335] drm/bridge_connector: enable HPD by default if supported [ Upstream commit 09077bc3116581f4d1cb961ec359ad56586e370b ] Hotplug events reported by bridge drivers over drm_bridge_hpd_notify() get ignored unless somebody calls drm_bridge_hpd_enable(). When the connector for the bridge is bridge_connector, such a call is done from drm_bridge_connector_enable_hpd(). However drm_bridge_connector_enable_hpd() is never called on init paths, documentation suggests that it is intended for suspend/resume paths. In result, once encoders are switched to bridge_connector, bridge-detected HPD stops working. This patch adds a call to that API on init path. This fixes HDMI HPD with rcar-du + adv7513 case when adv7513 reports HPD events via interrupts. Fixes: c24110a8fd09 ("drm: rcar-du: Use drm_bridge_connector_init() helper") Signed-off-by: Nikita Yushchenko Signed-off-by: Paul Cercueil Tested-by: Kieran Bingham Reviewed-by: Kieran Bingham Reviewed-by: Laurent Pinchart Link: https://patchwork.freedesktop.org/patch/msgid/20211225063151.2110878-1-nikita.yoush@cogentembedded.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_bridge_connector.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_bridge_connector.c b/drivers/gpu/drm/drm_bridge_connector.c index 791379816837d..4f20137ef21d5 100644 --- a/drivers/gpu/drm/drm_bridge_connector.c +++ b/drivers/gpu/drm/drm_bridge_connector.c @@ -369,8 +369,10 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, connector_type, ddc); drm_connector_helper_add(connector, &drm_bridge_connector_helper_funcs); - if (bridge_connector->bridge_hpd) + if (bridge_connector->bridge_hpd) { connector->polled = DRM_CONNECTOR_POLL_HPD; + drm_bridge_connector_enable_hpd(connector); + } else if (bridge_connector->bridge_detect) connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; -- GitLab From 97c8a858346491720e771f64e3575b2cda01d523 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Wed, 24 Nov 2021 16:07:52 +0100 Subject: [PATCH 0530/4335] dt-bindings: display: sitronix, st7735r: Fix backlight in example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 471e201f543559e2cb19b182b680ebf04d80ee31 ] The backlight property was lost during conversion to yaml in commit abdd9e3705c8 ("dt-bindings: display: sitronix,st7735r: Convert to DT schema"). Put it back. Fixes: abdd9e3705c8 ("dt-bindings: display: sitronix,st7735r: Convert to DT schema") Signed-off-by: Noralf Trønnes Acked-by: Rob Herring Reviewed-by: Geert Uytterhoeven Acked-by: David Lechner Link: https://patchwork.freedesktop.org/patch/msgid/20211124150757.17929-2-noralf@tronnes.org Signed-off-by: Sasha Levin --- Documentation/devicetree/bindings/display/sitronix,st7735r.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml index 0cebaaefda032..419c3b2ac5a6f 100644 --- a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml +++ b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml @@ -72,6 +72,7 @@ examples: dc-gpios = <&gpio 43 GPIO_ACTIVE_HIGH>; reset-gpios = <&gpio 80 GPIO_ACTIVE_HIGH>; rotation = <270>; + backlight = <&backlight>; }; }; -- GitLab From 2525e264ce5b73d4532e22e199026655063866d8 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 18 Mar 2022 13:43:28 -0400 Subject: [PATCH 0531/4335] drm/vmwgfx: Fix an invalid read [ Upstream commit 10a26e0d5fc3574f63ce8a6cf28381b126317f40 ] vmw_move assumed that buffers to be moved would always be vmw_buffer_object's but after introduction of new placement for mob pages that's no longer the case. The resulting invalid read didn't have any practical consequences because the memory isn't used unless the object actually is a vmw_buffer_object. Fix it by moving the cast to the spot where the results are used. Signed-off-by: Zack Rusin Fixes: f6be23264bba ("drm/vmwgfx: Introduce a new placement for MOB page tables") Reported-by: Chuck Lever III Reviewed-by: Martin Krastev Tested-by: Chuck Lever Link: https://patchwork.freedesktop.org/patch/msgid/20220318174332.440068-2-zack@kde.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 8d1e869cc1964..34ab08369e043 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -862,22 +862,21 @@ void vmw_query_move_notify(struct ttm_buffer_object *bo, struct ttm_device *bdev = bo->bdev; struct vmw_private *dev_priv; - dev_priv = container_of(bdev, struct vmw_private, bdev); mutex_lock(&dev_priv->binding_mutex); - dx_query_mob = container_of(bo, struct vmw_buffer_object, base); - if (!dx_query_mob || !dx_query_mob->dx_query_ctx) { - mutex_unlock(&dev_priv->binding_mutex); - return; - } - /* If BO is being moved from MOB to system memory */ if (new_mem->mem_type == TTM_PL_SYSTEM && old_mem->mem_type == VMW_PL_MOB) { struct vmw_fence_obj *fence; + dx_query_mob = container_of(bo, struct vmw_buffer_object, base); + if (!dx_query_mob || !dx_query_mob->dx_query_ctx) { + mutex_unlock(&dev_priv->binding_mutex); + return; + } + (void) vmw_query_readback_all(dx_query_mob); mutex_unlock(&dev_priv->binding_mutex); @@ -891,7 +890,6 @@ void vmw_query_move_notify(struct ttm_buffer_object *bo, (void) ttm_bo_wait(bo, false, false); } else mutex_unlock(&dev_priv->binding_mutex); - } /** -- GitLab From 384b9eeb9f2950a7928f321cd2168073cee8d873 Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Mon, 21 Mar 2022 12:58:23 +0200 Subject: [PATCH 0532/4335] ath11k: acquire ab->base_lock in unassign when finding the peer by addr [ Upstream commit 2db80f93869d491be57cbc2b36f30d0d3a0e5bde ] ath11k_peer_find_by_addr states via lockdep that ab->base_lock must be held when calling that function in order to protect the list. All callers except ath11k_mac_op_unassign_vif_chanctx have that lock acquired when calling ath11k_peer_find_by_addr. That lock is also not transitively held by a path towards ath11k_mac_op_unassign_vif_chanctx. The solution is to acquire the lock when calling ath11k_peer_find_by_addr inside ath11k_mac_op_unassign_vif_chanctx. I am currently working on a static analyser to detect missing locks and this was a reported case. I manually verified the report by looking at the code, but I do not have real hardware so this is compile tested only. Fixes: 701e48a43e15 ("ath11k: add packet log support for QCA6390") Signed-off-by: Niels Dossche Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220314215253.92658-1-dossche.niels@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/mac.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 07004564a3ec5..bf64ab6e8484b 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -5592,6 +5592,7 @@ ath11k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw, struct ath11k *ar = hw->priv; struct ath11k_base *ab = ar->ab; struct ath11k_vif *arvif = (void *)vif->drv_priv; + struct ath11k_peer *peer; int ret; mutex_lock(&ar->conf_mutex); @@ -5603,9 +5604,13 @@ ath11k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw, WARN_ON(!arvif->is_started); if (ab->hw_params.vdev_start_delay && - arvif->vdev_type == WMI_VDEV_TYPE_MONITOR && - ath11k_peer_find_by_addr(ab, ar->mac_addr)) - ath11k_peer_delete(ar, arvif->vdev_id, ar->mac_addr); + arvif->vdev_type == WMI_VDEV_TYPE_MONITOR) { + spin_lock_bh(&ab->base_lock); + peer = ath11k_peer_find_by_addr(ab, ar->mac_addr); + spin_unlock_bh(&ab->base_lock); + if (peer) + ath11k_peer_delete(ar, arvif->vdev_id, ar->mac_addr); + } ret = ath11k_mac_vdev_stop(arvif); if (ret) -- GitLab From b0bc3af3f09d781dc621797ed36e3a8275406649 Mon Sep 17 00:00:00 2001 From: Nicolas Belin Date: Wed, 16 Mar 2022 14:57:32 +0100 Subject: [PATCH 0533/4335] drm: bridge: it66121: Fix the register page length [ Upstream commit 003a1bd6a2a55c16cb2451153533dbedb12bebec ] Set the register page length or window length to 0x100 according to the documentation. Fixes: 988156dc2fc9 ("drm: bridge: add it66121 driver") Signed-off-by: Nicolas Belin Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20220316135733.173950-3-nbelin@baylibre.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/ite-it66121.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c index 06b59b422c696..64912b770086f 100644 --- a/drivers/gpu/drm/bridge/ite-it66121.c +++ b/drivers/gpu/drm/bridge/ite-it66121.c @@ -227,7 +227,7 @@ static const struct regmap_range_cfg it66121_regmap_banks[] = { .selector_mask = 0x1, .selector_shift = 0, .window_start = 0x00, - .window_len = 0x130, + .window_len = 0x100, }, }; -- GitLab From 996409ee9716f1bfc8973930d1a3370804e5a855 Mon Sep 17 00:00:00 2001 From: Wenli Looi Date: Sun, 20 Mar 2022 17:30:08 -0600 Subject: [PATCH 0534/4335] ath9k: fix ar9003_get_eepmisc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9aaff3864b603408c02c629957ae8d8ff5d5a4f2 ] The current implementation is reading the wrong eeprom type. Fixes: d8ec2e2a63e8 ("ath9k: Add an eeprom_ops callback for retrieving the eepmisc value") Signed-off-by: Wenli Looi Acked-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220320233010.123106-5-wlooi@ucalgary.ca Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/ar9003_eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index b0a4ca3559fd8..abed1effd95ca 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -5615,7 +5615,7 @@ unsigned int ar9003_get_paprd_scale_factor(struct ath_hw *ah, static u8 ar9003_get_eepmisc(struct ath_hw *ah) { - return ah->eeprom.map4k.baseEepHeader.eepMisc; + return ah->eeprom.ar9300_eep.baseEepHeader.opCapFlags.eepMisc; } const struct eeprom_ops eep_ar9300_ops = { -- GitLab From 96db9afa4958bbaee77995eb0c84e08cdcce74ae Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 30 Mar 2022 20:04:26 +0300 Subject: [PATCH 0535/4335] drm/edid: fix invalid EDID extension block filtering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3aefc722ff52076407203b6af9713de567993adf ] The invalid EDID block filtering uses the number of valid EDID extensions instead of all EDID extensions for looping the extensions in the copy. This is fine, by coincidence, if all the invalid blocks are at the end of the EDID. However, it's completely broken if there are invalid extensions in the middle; the invalid blocks are included and valid blocks are excluded. Fix it by modifying the base block after, not before, the copy. Fixes: 14544d0937bf ("drm/edid: Only print the bad edid when aborting") Reported-by: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220330170426.349248-1-jani.nikula@intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_edid.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index ee6f44f9a81c3..6ab048ba8021c 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -1994,9 +1994,6 @@ struct edid *drm_do_get_edid(struct drm_connector *connector, connector_bad_edid(connector, edid, edid[0x7e] + 1); - edid[EDID_LENGTH-1] += edid[0x7e] - valid_extensions; - edid[0x7e] = valid_extensions; - new = kmalloc_array(valid_extensions + 1, EDID_LENGTH, GFP_KERNEL); if (!new) @@ -2013,6 +2010,9 @@ struct edid *drm_do_get_edid(struct drm_connector *connector, base += EDID_LENGTH; } + new[EDID_LENGTH - 1] += new[0x7e] - valid_extensions; + new[0x7e] = valid_extensions; + kfree(edid); edid = new; } -- GitLab From faeb95b4adaab94468a57f9ef74f25bafcc8a427 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 21 Mar 2022 11:47:05 +0100 Subject: [PATCH 0536/4335] drm/bridge: adv7511: clean up CEC adapter when probe fails [ Upstream commit 7ed2b0dabf7a22874cb30f8878df239ef638eb53 ] When the probe routine fails we also need to clean up the CEC adapter registered in adv7511_cec_init(). Fixes: 3b1b975003e4 ("drm: adv7511/33: add HDMI CEC support") Signed-off-by: Lucas Stach Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20220321104705.2804423-1-l.stach@pengutronix.de Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index c02f3ec60b04c..8c2025584f1b4 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -1306,6 +1306,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) return 0; err_unregister_cec: + cec_unregister_adapter(adv7511->cec_adap); i2c_unregister_device(adv7511->i2c_cec); clk_disable_unprepare(adv7511->cec_clk); err_i2c_unregister_packet: -- GitLab From 495f08380e84a8469d0579a4cef0999290398472 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 31 Mar 2022 17:04:59 +0200 Subject: [PATCH 0537/4335] drm: bridge: icn6211: Fix register layout [ Upstream commit 2dcec57b3734029cc1adc5cb872f61e21609eed4 ] The chip register layout has nothing to do with MIPI DCS, the registers incorrectly marked as MIPI DCS in the driver are regular chip registers often with completely different function. Fill in the actual register names and bits from [1] and [2] and add the entire register layout, since the documentation for this chip is hard to come by. [1] https://github.com/rockchip-linux/kernel/blob/develop-4.19/drivers/gpu/drm/bridge/icn6211.c [2] https://github.com/tdjastrzebski/ICN6211-Configurator Acked-by: Maxime Ripard Fixes: ce517f18944e3 ("drm: bridge: Add Chipone ICN6211 MIPI-DSI to RGB bridge") Signed-off-by: Marek Vasut Cc: Jagan Teki Cc: Maxime Ripard Cc: Robert Foss Cc: Sam Ravnborg Cc: Thomas Zimmermann To: dri-devel@lists.freedesktop.org Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20220331150509.9838-2-marex@denx.de Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/chipone-icn6211.c | 134 ++++++++++++++++++++--- 1 file changed, 117 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/bridge/chipone-icn6211.c b/drivers/gpu/drm/bridge/chipone-icn6211.c index a6151db955868..eb26615b2993a 100644 --- a/drivers/gpu/drm/bridge/chipone-icn6211.c +++ b/drivers/gpu/drm/bridge/chipone-icn6211.c @@ -14,8 +14,19 @@ #include #include -#include