From 874f2a7d412ef7a30179349e9dc480f0c2429289 Mon Sep 17 00:00:00 2001 From: Houston Yaroschoff Date: Mon, 11 Jun 2018 12:39:09 +0200 Subject: [PATCH 0001/1291] usb: cdc_acm: Add quirk for Uniden UBC125 scanner commit 4a762569a2722b8a48066c7bacf0e1dc67d17fa1 upstream. Uniden UBC125 radio scanner has USB interface which fails to work with cdc_acm driver: usb 1-1.5: new full-speed USB device number 4 using xhci_hcd cdc_acm 1-1.5:1.0: Zero length descriptor references cdc_acm: probe of 1-1.5:1.0 failed with error -22 Adding the NO_UNION_NORMAL quirk for the device fixes the issue: usb 1-4: new full-speed USB device number 15 using xhci_hcd usb 1-4: New USB device found, idVendor=1965, idProduct=0018 usb 1-4: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 1-4: Product: UBC125XLT usb 1-4: Manufacturer: Uniden Corp. usb 1-4: SerialNumber: 0001 cdc_acm 1-4:1.0: ttyACM0: USB ACM device `lsusb -v` of the device: Bus 001 Device 015: ID 1965:0018 Uniden Corporation Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 2 Communications bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x1965 Uniden Corporation idProduct 0x0018 bcdDevice 0.01 iManufacturer 1 Uniden Corp. iProduct 2 UBC125XLT iSerial 3 0001 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 48 bNumInterfaces 2 bConfigurationValue 1 iConfiguration 0 bmAttributes 0x80 (Bus Powered) MaxPower 500mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 0 None iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x87 EP 7 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0008 1x 8 bytes bInterval 10 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 0 Device Status: 0x0000 (Bus Powered) Signed-off-by: Houston Yaroschoff Cc: stable Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 22952d70b981..3b9aadd007f5 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1771,6 +1771,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x11ca, 0x0201), /* VeriFone Mx870 Gadget Serial */ .driver_info = SINGLE_RX_URB, }, + { USB_DEVICE(0x1965, 0x0018), /* Uniden UBC125XLT */ + .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ + }, { USB_DEVICE(0x22b8, 0x7000), /* Motorola Q Phone */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, -- GitLab From 15e449969537594f970c166bbe811889f2ec853b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 18 Jun 2018 10:24:03 +0200 Subject: [PATCH 0002/1291] USB: serial: cp210x: add CESINEL device ids commit 24160628a34af962ac99f2f58e547ac3c4cbd26f upstream. Add device ids for CESINEL products. Reported-by: Carlos Barcala Lara Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index d0f00274d16c..31b8e303ca38 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -98,6 +98,9 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8156) }, /* B&G H3000 link cable */ { USB_DEVICE(0x10C4, 0x815E) }, /* Helicomm IP-Link 1220-DVM */ { USB_DEVICE(0x10C4, 0x815F) }, /* Timewave HamLinkUSB */ + { USB_DEVICE(0x10C4, 0x817C) }, /* CESINEL MEDCAL N Power Quality Monitor */ + { USB_DEVICE(0x10C4, 0x817D) }, /* CESINEL MEDCAL NT Power Quality Monitor */ + { USB_DEVICE(0x10C4, 0x817E) }, /* CESINEL MEDCAL S Power Quality Monitor */ { USB_DEVICE(0x10C4, 0x818B) }, /* AVIT Research USB to TTL */ { USB_DEVICE(0x10C4, 0x819F) }, /* MJS USB Toslink Switcher */ { USB_DEVICE(0x10C4, 0x81A6) }, /* ThinkOptics WavIt */ @@ -115,6 +118,9 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demonstration module */ { USB_DEVICE(0x10C4, 0x8281) }, /* Nanotec Plug & Drive */ { USB_DEVICE(0x10C4, 0x8293) }, /* Telegesis ETRX2USB */ + { USB_DEVICE(0x10C4, 0x82EF) }, /* CESINEL FALCO 6105 AC Power Supply */ + { USB_DEVICE(0x10C4, 0x82F1) }, /* CESINEL MEDCAL EFD Earth Fault Detector */ + { USB_DEVICE(0x10C4, 0x82F2) }, /* CESINEL MEDCAL ST Network Analyzer */ { USB_DEVICE(0x10C4, 0x82F4) }, /* Starizona MicroTouch */ { USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */ { USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */ @@ -127,7 +133,9 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */ { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */ { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */ + { USB_DEVICE(0x10C4, 0x851E) }, /* CESINEL MEDCAL PT Network Analyzer */ { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */ + { USB_DEVICE(0x10C4, 0x85B8) }, /* CESINEL ReCon T Energy Logger */ { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */ { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */ { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ @@ -137,10 +145,13 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */ { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */ + { USB_DEVICE(0x10C4, 0x88FB) }, /* CESINEL MEDCAL STII Network Analyzer */ + { USB_DEVICE(0x10C4, 0x8938) }, /* CESINEL MEDCAL S II Network Analyzer */ { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */ { USB_DEVICE(0x10C4, 0x8962) }, /* Brim Brothers charging dock */ { USB_DEVICE(0x10C4, 0x8977) }, /* CEL MeshWorks DevKit Device */ { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */ + { USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */ { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */ { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */ { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */ -- GitLab From e80add5223dda08db1e5453a0ed4bc1175f394e9 Mon Sep 17 00:00:00 2001 From: Karoly Pados Date: Sat, 9 Jun 2018 13:26:08 +0200 Subject: [PATCH 0003/1291] USB: serial: cp210x: add Silicon Labs IDs for Windows Update commit 2f839823382748664b643daa73f41ee0cc01ced6 upstream. Silicon Labs defines alternative VID/PID pairs for some chips that when used will automatically install drivers for Windows users without manual intervention. Unfortunately, these IDs are not recognized by the Linux module, so using these IDs improves user experience on one platform but degrades it on Linux. This patch addresses this problem. Signed-off-by: Karoly Pados Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 31b8e303ca38..142a83e5974c 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -157,8 +157,11 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ + { USB_DEVICE(0x10C4, 0xEA63) }, /* Silicon Labs Windows Update (CP2101-4/CP2102N) */ { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA71) }, /* Infinity GPS-MIC-1 Radio Monophone */ + { USB_DEVICE(0x10C4, 0xEA7A) }, /* Silicon Labs Windows Update (CP2105) */ + { USB_DEVICE(0x10C4, 0xEA7B) }, /* Silicon Labs Windows Update (CP2108) */ { USB_DEVICE(0x10C4, 0xF001) }, /* Elan Digital Systems USBscope50 */ { USB_DEVICE(0x10C4, 0xF002) }, /* Elan Digital Systems USBwave12 */ { USB_DEVICE(0x10C4, 0xF003) }, /* Elan Digital Systems USBpulse100 */ -- GitLab From 447294efb995181076c2c34e9c6bee1703c74a9a Mon Sep 17 00:00:00 2001 From: William Wu Date: Mon, 21 May 2018 18:12:00 +0800 Subject: [PATCH 0004/1291] usb: dwc2: fix the incorrect bitmaps for the ports of multi_tt hub commit 8760675932ddb614e83702117d36ea644050c609 upstream. The dwc2_get_ls_map() use ttport to reference into the bitmap if we're on a multi_tt hub. But the bitmaps index from 0 to (hub->maxchild - 1), while the ttport index from 1 to hub->maxchild. This will cause invalid memory access when the number of ttport is hub->maxchild. Without this patch, I can easily meet a Kernel panic issue if connect a low-speed USB mouse with the max port of FE2.1 multi-tt hub (1a40:0201) on rk3288 platform. Fixes: 9f9f09b048f5 ("usb: dwc2: host: Totally redo the microframe scheduler") Cc: Reviewed-by: Douglas Anderson Acked-by: Minas Harutyunyan hminas@synopsys.com> Signed-off-by: William Wu Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/hcd_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c index 3ae8b1bbaa55..7f51a77bc5cc 100644 --- a/drivers/usb/dwc2/hcd_queue.c +++ b/drivers/usb/dwc2/hcd_queue.c @@ -379,7 +379,7 @@ static unsigned long *dwc2_get_ls_map(struct dwc2_hsotg *hsotg, /* Get the map and adjust if this is a multi_tt hub */ map = qh->dwc_tt->periodic_bitmaps; if (qh->dwc_tt->usb_tt->multi) - map += DWC2_ELEMENTS_PER_LS_BITMAP * qh->ttport; + map += DWC2_ELEMENTS_PER_LS_BITMAP * (qh->ttport - 1); return map; } -- GitLab From f2e9a38558d8bbd670357922c3d06b845c8d92df Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 21 Jun 2018 16:43:17 +0300 Subject: [PATCH 0005/1291] acpi: Add helper for deactivating memory region commit d2d2e3c46be5d6dd8001d0eebdf7cafb9bc7006b upstream. Sometimes memory resource may be overlapping with SystemMemory Operation Region by design, for example if the memory region is used as a mailbox for communication with a firmware in the system. One occasion of such mailboxes is USB Type-C Connector System Software Interface (UCSI). With regions like that, it is important that the driver is able to map the memory with the requirements it has. For example, the driver should be allowed to map the memory as non-cached memory. However, if the operation region has been accessed before the driver has mapped the memory, the memory has been marked as write-back by the time the driver is loaded. That means the driver will fail to map the memory if it expects non-cached memory. To work around the problem, introducing helper that the drivers can use to temporarily deactivate (unmap) SystemMemory Operation Regions that overlap with their IO memory. Fixes: 8243edf44152 ("usb: typec: ucsi: Add ACPI driver") Cc: stable@vger.kernel.org Reviewed-by: Rafael J. Wysocki Signed-off-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/osl.c | 72 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 3 ++ 2 files changed, 75 insertions(+) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index db78d353bab1..191e86c62037 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -45,6 +45,8 @@ #include #include +#include "acpica/accommon.h" +#include "acpica/acnamesp.h" #include "internal.h" #define _COMPONENT ACPI_OS_SERVICES @@ -1477,6 +1479,76 @@ int acpi_check_region(resource_size_t start, resource_size_t n, } EXPORT_SYMBOL(acpi_check_region); +static acpi_status acpi_deactivate_mem_region(acpi_handle handle, u32 level, + void *_res, void **return_value) +{ + struct acpi_mem_space_context **mem_ctx; + union acpi_operand_object *handler_obj; + union acpi_operand_object *region_obj2; + union acpi_operand_object *region_obj; + struct resource *res = _res; + acpi_status status; + + region_obj = acpi_ns_get_attached_object(handle); + if (!region_obj) + return AE_OK; + + handler_obj = region_obj->region.handler; + if (!handler_obj) + return AE_OK; + + if (region_obj->region.space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) + return AE_OK; + + if (!(region_obj->region.flags & AOPOBJ_SETUP_COMPLETE)) + return AE_OK; + + region_obj2 = acpi_ns_get_secondary_object(region_obj); + if (!region_obj2) + return AE_OK; + + mem_ctx = (void *)®ion_obj2->extra.region_context; + + if (!(mem_ctx[0]->address >= res->start && + mem_ctx[0]->address < res->end)) + return AE_OK; + + status = handler_obj->address_space.setup(region_obj, + ACPI_REGION_DEACTIVATE, + NULL, (void **)mem_ctx); + if (ACPI_SUCCESS(status)) + region_obj->region.flags &= ~(AOPOBJ_SETUP_COMPLETE); + + return status; +} + +/** + * acpi_release_memory - Release any mappings done to a memory region + * @handle: Handle to namespace node + * @res: Memory resource + * @level: A level that terminates the search + * + * Walks through @handle and unmaps all SystemMemory Operation Regions that + * overlap with @res and that have already been activated (mapped). + * + * This is a helper that allows drivers to place special requirements on memory + * region that may overlap with operation regions, primarily allowing them to + * safely map the region as non-cached memory. + * + * The unmapped Operation Regions will be automatically remapped next time they + * are called, so the drivers do not need to do anything else. + */ +acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, + u32 level) +{ + if (!(res->flags & IORESOURCE_MEM)) + return AE_TYPE; + + return acpi_walk_namespace(ACPI_TYPE_REGION, handle, level, + acpi_deactivate_mem_region, NULL, res, NULL); +} +EXPORT_SYMBOL_GPL(acpi_release_memory); + /* * Let drivers know whether the resource checks are effective */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 502af53ec012..13c105121a18 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -441,6 +441,9 @@ int acpi_check_resource_conflict(const struct resource *res); int acpi_check_region(resource_size_t start, resource_size_t n, const char *name); +acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, + u32 level); + int acpi_resources_are_enforced(void); #ifdef CONFIG_HIBERNATION -- GitLab From 47adbb26373f2e89f8ef76524489a250e4608b04 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 21 Jun 2018 16:43:18 +0300 Subject: [PATCH 0006/1291] usb: typec: ucsi: acpi: Workaround for cache mode issue commit 1f9f9d168ce619608572b01771c47a41b15429e6 upstream. This fixes an issue where the driver fails with an error: ioremap error for 0x3f799000-0x3f79a000, requested 0x2, got 0x0 On some platforms the UCSI ACPI mailbox SystemMemory Operation Region may be setup before the driver has been loaded. That will lead into the driver failing to map the mailbox region, as it has been already marked as write-back memory. acpi_os_ioremap() for x86 uses ioremap_cache() unconditionally. When the issue happens, the embedded controller has a pending query event for the UCSI notification right after boot-up which causes the operation region to be setup before UCSI driver has been loaded. The fix is to notify acpi core that the driver is about to access memory region which potentially overlaps with an operation region right before mapping it. acpi_release_memory() will check if the memory has already been setup (mapped) by acpi core, and deactivate it (unmap) if it has. The driver is then able to map the memory with ioremap_nocache() and set the memtype to uncached for the region. Reported-by: Paul Menzel Fixes: 8243edf44152 ("usb: typec: ucsi: Add ACPI driver") Cc: stable@vger.kernel.org Signed-off-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index cabd47612b0a..494d2a49203a 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -82,6 +82,11 @@ static int ucsi_acpi_probe(struct platform_device *pdev) return -ENODEV; } + /* This will make sure we can use ioremap_nocache() */ + status = acpi_release_memory(ACPI_HANDLE(&pdev->dev), res, 1); + if (ACPI_FAILURE(status)) + return -ENOMEM; + /* * NOTE: The memory region for the data structures is used also in an * operation region, which means ACPI has already reserved it. Therefore -- GitLab From 0a7db82ed9ccbf6a85f4269a454e06e438b41b99 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 21 Jun 2018 16:43:19 +0300 Subject: [PATCH 0007/1291] usb: typec: ucsi: Fix for incorrect status data issue commit 68816e16b4789f2d05e77b6dcb77564cf5d6a8d8 upstream. According to UCSI Specification, Connector Change Event only means a change in the Connector Status and Operation Mode fields of the STATUS data structure. So any other change should create another event. Unfortunately on some platforms the firmware acting as PPM (platform policy manager - usually embedded controller firmware) still does not report any other status changes if there is a connector change event. So if the connector power or data role was changed when a device was plugged to the connector, the driver does not get any indication about that. The port will show wrong roles if that happens. To fix the issue, always checking the data and power role together with a connector change event. Fixes: c1b0bc2dabfa ("usb: typec: Add support for UCSI interface") Signed-off-by: Heikki Krogerus Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index dd24c5c1534d..251f5d66651e 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -346,6 +346,19 @@ static void ucsi_connector_change(struct work_struct *work) } if (con->status.change & UCSI_CONSTAT_CONNECT_CHANGE) { + typec_set_pwr_role(con->port, con->status.pwr_dir); + + switch (con->status.partner_type) { + case UCSI_CONSTAT_PARTNER_TYPE_UFP: + typec_set_data_role(con->port, TYPEC_HOST); + break; + case UCSI_CONSTAT_PARTNER_TYPE_DFP: + typec_set_data_role(con->port, TYPEC_DEVICE); + break; + default: + break; + } + if (con->status.connected) ucsi_register_partner(con); else -- GitLab From 716382f1c1eaca706ab694a61215b5e3416a0ea6 Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Thu, 21 Jun 2018 16:19:42 +0300 Subject: [PATCH 0008/1291] xhci: Fix kernel oops in trace_xhci_free_virt_device commit d850c1658328e757635a46763783c6fd56390dcb upstream. commit 44a182b9d177 ("xhci: Fix use-after-free in xhci_free_virt_device") set dev->udev pointer to NULL in xhci_free_dev(), it will cause kernel panic in trace_xhci_free_virt_device. This patch reimplement the trace function trace_xhci_free_virt_device, remove dev->udev dereference and added more useful parameters to show in the trace function,it also makes sure dev->udev is not NULL before calling trace_xhci_free_virt_device. This issue happened when xhci-hcd trace is enabled and USB devices hot plug test. Original use-after-free patch went to stable so this needs so be applied there as well. [ 1092.022457] usb 2-4: USB disconnect, device number 6 [ 1092.092772] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 1092.101694] PGD 0 P4D 0 [ 1092.104601] Oops: 0000 [#1] SMP [ 1092.207734] Workqueue: usb_hub_wq hub_event [ 1092.212507] RIP: 0010:trace_event_raw_event_xhci_log_virt_dev+0x6c/0xf0 [ 1092.220050] RSP: 0018:ffff8c252e883d28 EFLAGS: 00010086 [ 1092.226024] RAX: ffff8c24af86fa84 RBX: 0000000000000003 RCX: ffff8c25255c2a01 [ 1092.234130] RDX: 0000000000000000 RSI: 00000000aef55009 RDI: ffff8c252e883d28 [ 1092.242242] RBP: ffff8c252550e2c0 R08: ffff8c24af86fa84 R09: 0000000000000a70 [ 1092.250364] R10: 0000000000000a70 R11: 0000000000000000 R12: ffff8c251f21a000 [ 1092.258468] R13: 000000000000000c R14: ffff8c251f21a000 R15: ffff8c251f432f60 [ 1092.266572] FS: 0000000000000000(0000) GS:ffff8c252e880000(0000) knlGS:0000000000000000 [ 1092.275757] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1092.282281] CR2: 0000000000000000 CR3: 0000000154209001 CR4: 00000000003606e0 [ 1092.290384] Call Trace: [ 1092.293156] [ 1092.295439] xhci_free_virt_device.part.34+0x182/0x1a0 [ 1092.301288] handle_cmd_completion+0x7ac/0xfa0 [ 1092.306336] ? trace_event_raw_event_xhci_log_trb+0x6e/0xa0 [ 1092.312661] xhci_irq+0x3e8/0x1f60 [ 1092.316524] __handle_irq_event_percpu+0x75/0x180 [ 1092.321876] handle_irq_event_percpu+0x20/0x50 [ 1092.326922] handle_irq_event+0x36/0x60 [ 1092.331273] handle_edge_irq+0x6d/0x180 [ 1092.335644] handle_irq+0x16/0x20 [ 1092.339417] do_IRQ+0x41/0xc0 [ 1092.342782] common_interrupt+0xf/0xf [ 1092.346955] Fixes: 44a182b9d177 ("xhci: Fix use-after-free in xhci_free_virt_device") Cc: Signed-off-by: Zhengjun Xing Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 4 ++-- drivers/usb/host/xhci-trace.h | 36 ++++++++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index efd7e4882d66..00b710016d21 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -891,12 +891,12 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) dev = xhci->devs[slot_id]; - trace_xhci_free_virt_device(dev); - xhci->dcbaa->dev_context_ptrs[slot_id] = 0; if (!dev) return; + trace_xhci_free_virt_device(dev); + if (dev->tt_info) old_active_eps = dev->tt_info->active_eps; diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h index f20753b99624..02a1164ca599 100644 --- a/drivers/usb/host/xhci-trace.h +++ b/drivers/usb/host/xhci-trace.h @@ -158,6 +158,37 @@ DEFINE_EVENT(xhci_log_trb, xhci_queue_trb, TP_ARGS(ring, trb) ); +DECLARE_EVENT_CLASS(xhci_log_free_virt_dev, + TP_PROTO(struct xhci_virt_device *vdev), + TP_ARGS(vdev), + TP_STRUCT__entry( + __field(void *, vdev) + __field(unsigned long long, out_ctx) + __field(unsigned long long, in_ctx) + __field(u8, fake_port) + __field(u8, real_port) + __field(u16, current_mel) + + ), + TP_fast_assign( + __entry->vdev = vdev; + __entry->in_ctx = (unsigned long long) vdev->in_ctx->dma; + __entry->out_ctx = (unsigned long long) vdev->out_ctx->dma; + __entry->fake_port = (u8) vdev->fake_port; + __entry->real_port = (u8) vdev->real_port; + __entry->current_mel = (u16) vdev->current_mel; + ), + TP_printk("vdev %p ctx %llx | %llx fake_port %d real_port %d current_mel %d", + __entry->vdev, __entry->in_ctx, __entry->out_ctx, + __entry->fake_port, __entry->real_port, __entry->current_mel + ) +); + +DEFINE_EVENT(xhci_log_free_virt_dev, xhci_free_virt_device, + TP_PROTO(struct xhci_virt_device *vdev), + TP_ARGS(vdev) +); + DECLARE_EVENT_CLASS(xhci_log_virt_dev, TP_PROTO(struct xhci_virt_device *vdev), TP_ARGS(vdev), @@ -195,11 +226,6 @@ DEFINE_EVENT(xhci_log_virt_dev, xhci_alloc_virt_device, TP_ARGS(vdev) ); -DEFINE_EVENT(xhci_log_virt_dev, xhci_free_virt_device, - TP_PROTO(struct xhci_virt_device *vdev), - TP_ARGS(vdev) -); - DEFINE_EVENT(xhci_log_virt_dev, xhci_setup_device, TP_PROTO(struct xhci_virt_device *vdev), TP_ARGS(vdev) -- GitLab From d105fb8c88940765d9555ec921ea2e1267286628 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 26 May 2018 09:53:13 +0900 Subject: [PATCH 0009/1291] n_tty: Fix stall at n_tty_receive_char_special(). commit 3d63b7e4ae0dc5e02d28ddd2fa1f945defc68d81 upstream. syzbot is reporting stalls at n_tty_receive_char_special() [1]. This is because comparison is not working as expected since ldata->read_head can change at any moment. Mitigate this by explicitly masking with buffer size when checking condition for "while" loops. [1] https://syzkaller.appspot.com/bug?id=3d7481a346958d9469bebbeb0537d5f056bdd6e8 Signed-off-by: Tetsuo Handa Reported-by: syzbot Fixes: bc5a5e3f45d04784 ("n_tty: Don't wrap input buffer indices at buffer size") Cc: stable Cc: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 1c70541a1467..a8a7a13c8683 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -126,6 +126,8 @@ struct n_tty_data { struct mutex output_lock; }; +#define MASK(x) ((x) & (N_TTY_BUF_SIZE - 1)) + static inline size_t read_cnt(struct n_tty_data *ldata) { return ldata->read_head - ldata->read_tail; @@ -980,14 +982,15 @@ static void eraser(unsigned char c, struct tty_struct *tty) } seen_alnums = 0; - while (ldata->read_head != ldata->canon_head) { + while (MASK(ldata->read_head) != MASK(ldata->canon_head)) { head = ldata->read_head; /* erase a single possibly multibyte character */ do { head--; c = read_buf(ldata, head); - } while (is_continuation(c, tty) && head != ldata->canon_head); + } while (is_continuation(c, tty) && + MASK(head) != MASK(ldata->canon_head)); /* do not partially erase */ if (is_continuation(c, tty)) @@ -1029,7 +1032,7 @@ static void eraser(unsigned char c, struct tty_struct *tty) * This info is used to go back the correct * number of columns. */ - while (tail != ldata->canon_head) { + while (MASK(tail) != MASK(ldata->canon_head)) { tail--; c = read_buf(ldata, tail); if (c == '\t') { @@ -1304,7 +1307,7 @@ n_tty_receive_char_special(struct tty_struct *tty, unsigned char c) finish_erasing(ldata); echo_char(c, tty); echo_char_raw('\n', ldata); - while (tail != ldata->read_head) { + while (MASK(tail) != MASK(ldata->read_head)) { echo_char(read_buf(ldata, tail), tty); tail++; } @@ -2413,7 +2416,7 @@ static unsigned long inq_canon(struct n_tty_data *ldata) tail = ldata->read_tail; nr = head - tail; /* Skip EOF-chars.. */ - while (head != tail) { + while (MASK(head) != MASK(tail)) { if (test_bit(tail & (N_TTY_BUF_SIZE - 1), ldata->read_flags) && read_buf(ldata, tail) == __DISABLED_CHAR) nr--; -- GitLab From c034d161fa63f35e0107b9c03d4b3108d2401a08 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 26 May 2018 09:53:14 +0900 Subject: [PATCH 0010/1291] n_tty: Access echo_* variables carefully. commit ebec3f8f5271139df618ebdf8427e24ba102ba94 upstream. syzbot is reporting stalls at __process_echoes() [1]. This is because since ldata->echo_commit < ldata->echo_tail becomes true for some reason, the discard loop is serving as almost infinite loop. This patch tries to avoid falling into ldata->echo_commit < ldata->echo_tail situation by making access to echo_* variables more carefully. Since reset_buffer_flags() is called without output_lock held, it should not touch echo_* variables. And omit a call to reset_buffer_flags() from n_tty_open() by using vzalloc(). Since add_echo_byte() is called without output_lock held, it needs memory barrier between storing into echo_buf[] and incrementing echo_head counter. echo_buf() needs corresponding memory barrier before reading echo_buf[]. Lack of handling the possibility of not-yet-stored multi-byte operation might be the reason of falling into ldata->echo_commit < ldata->echo_tail situation, for if I do WARN_ON(ldata->echo_commit == tail + 1) prior to echo_buf(ldata, tail + 1), the WARN_ON() fires. Also, explicitly masking with buffer for the former "while" loop, and use ldata->echo_commit > tail for the latter "while" loop. [1] https://syzkaller.appspot.com/bug?id=17f23b094cd80df750e5b0f8982c521ee6bcbf40 Signed-off-by: Tetsuo Handa Reported-by: syzbot Cc: Peter Hurley Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index a8a7a13c8683..0475f9685a41 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -145,6 +145,7 @@ static inline unsigned char *read_buf_addr(struct n_tty_data *ldata, size_t i) static inline unsigned char echo_buf(struct n_tty_data *ldata, size_t i) { + smp_rmb(); /* Matches smp_wmb() in add_echo_byte(). */ return ldata->echo_buf[i & (N_TTY_BUF_SIZE - 1)]; } @@ -320,9 +321,7 @@ static inline void put_tty_queue(unsigned char c, struct n_tty_data *ldata) static void reset_buffer_flags(struct n_tty_data *ldata) { ldata->read_head = ldata->canon_head = ldata->read_tail = 0; - ldata->echo_head = ldata->echo_tail = ldata->echo_commit = 0; ldata->commit_head = 0; - ldata->echo_mark = 0; ldata->line_start = 0; ldata->erasing = 0; @@ -621,12 +620,19 @@ static size_t __process_echoes(struct tty_struct *tty) old_space = space = tty_write_room(tty); tail = ldata->echo_tail; - while (ldata->echo_commit != tail) { + while (MASK(ldata->echo_commit) != MASK(tail)) { c = echo_buf(ldata, tail); if (c == ECHO_OP_START) { unsigned char op; int no_space_left = 0; + /* + * Since add_echo_byte() is called without holding + * output_lock, we might see only portion of multi-byte + * operation. + */ + if (MASK(ldata->echo_commit) == MASK(tail + 1)) + goto not_yet_stored; /* * If the buffer byte is the start of a multi-byte * operation, get the next byte, which is either the @@ -638,6 +644,8 @@ static size_t __process_echoes(struct tty_struct *tty) unsigned int num_chars, num_bs; case ECHO_OP_ERASE_TAB: + if (MASK(ldata->echo_commit) == MASK(tail + 2)) + goto not_yet_stored; num_chars = echo_buf(ldata, tail + 2); /* @@ -732,7 +740,8 @@ static size_t __process_echoes(struct tty_struct *tty) /* If the echo buffer is nearly full (so that the possibility exists * of echo overrun before the next commit), then discard enough * data at the tail to prevent a subsequent overrun */ - while (ldata->echo_commit - tail >= ECHO_DISCARD_WATERMARK) { + while (ldata->echo_commit > tail && + ldata->echo_commit - tail >= ECHO_DISCARD_WATERMARK) { if (echo_buf(ldata, tail) == ECHO_OP_START) { if (echo_buf(ldata, tail + 1) == ECHO_OP_ERASE_TAB) tail += 3; @@ -742,6 +751,7 @@ static size_t __process_echoes(struct tty_struct *tty) tail++; } + not_yet_stored: ldata->echo_tail = tail; return old_space - space; } @@ -752,6 +762,7 @@ static void commit_echoes(struct tty_struct *tty) size_t nr, old, echoed; size_t head; + mutex_lock(&ldata->output_lock); head = ldata->echo_head; ldata->echo_mark = head; old = ldata->echo_commit - ldata->echo_tail; @@ -760,10 +771,12 @@ static void commit_echoes(struct tty_struct *tty) * is over the threshold (and try again each time another * block is accumulated) */ nr = head - ldata->echo_tail; - if (nr < ECHO_COMMIT_WATERMARK || (nr % ECHO_BLOCK > old % ECHO_BLOCK)) + if (nr < ECHO_COMMIT_WATERMARK || + (nr % ECHO_BLOCK > old % ECHO_BLOCK)) { + mutex_unlock(&ldata->output_lock); return; + } - mutex_lock(&ldata->output_lock); ldata->echo_commit = head; echoed = __process_echoes(tty); mutex_unlock(&ldata->output_lock); @@ -814,7 +827,9 @@ static void flush_echoes(struct tty_struct *tty) static inline void add_echo_byte(unsigned char c, struct n_tty_data *ldata) { - *echo_buf_addr(ldata, ldata->echo_head++) = c; + *echo_buf_addr(ldata, ldata->echo_head) = c; + smp_wmb(); /* Matches smp_rmb() in echo_buf(). */ + ldata->echo_head++; } /** @@ -1883,30 +1898,21 @@ static int n_tty_open(struct tty_struct *tty) struct n_tty_data *ldata; /* Currently a malloc failure here can panic */ - ldata = vmalloc(sizeof(*ldata)); + ldata = vzalloc(sizeof(*ldata)); if (!ldata) - goto err; + return -ENOMEM; ldata->overrun_time = jiffies; mutex_init(&ldata->atomic_read_lock); mutex_init(&ldata->output_lock); tty->disc_data = ldata; - reset_buffer_flags(tty->disc_data); - ldata->column = 0; - ldata->canon_column = 0; - ldata->num_overrun = 0; - ldata->no_room = 0; - ldata->lnext = 0; tty->closing = 0; /* indicate buffer work may resume */ clear_bit(TTY_LDISC_HALTED, &tty->flags); n_tty_set_termios(tty, NULL); tty_unthrottle(tty); - return 0; -err: - return -ENOMEM; } static inline int input_available_p(struct tty_struct *tty, int poll) -- GitLab From 2a7a8556b3b4d56e60b7e2edc57754e701ddb788 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 11 Jun 2018 11:06:53 -0700 Subject: [PATCH 0011/1291] staging: android: ion: Return an ERR_PTR in ion_map_kernel commit 0a2bc00341dcfcc793c0dbf4f8d43adf60458b05 upstream. The expected return value from ion_map_kernel is an ERR_PTR. The error path for a vmalloc failure currently just returns NULL, triggering a warning in ion_buffer_kmap_get. Encode the vmalloc failure as an ERR_PTR. Reported-by: syzbot+55b1d9f811650de944c6@syzkaller.appspotmail.com Signed-off-by: Laura Abbott Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion_heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/android/ion/ion_heap.c b/drivers/staging/android/ion/ion_heap.c index 91faa7f035b9..babbd94c32d9 100644 --- a/drivers/staging/android/ion/ion_heap.c +++ b/drivers/staging/android/ion/ion_heap.c @@ -38,7 +38,7 @@ void *ion_heap_map_kernel(struct ion_heap *heap, struct page **tmp = pages; if (!pages) - return NULL; + return ERR_PTR(-ENOMEM); if (buffer->flags & ION_FLAG_CACHED) pgprot = PAGE_KERNEL; -- GitLab From 3ff8e558ba7b6f7fce1ea627c797dc03240bc40f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 6 Jun 2018 21:00:41 +0300 Subject: [PATCH 0012/1291] serial: 8250_pci: Remove stalled entries in blacklist commit 20dcff436e9fcd2e106b0ccc48a52206bc176d70 upstream. After the commit 7d8905d06405 ("serial: 8250_pci: Enable device after we check black list") pure serial multi-port cards, such as CH355, got blacklisted and thus not being enumerated anymore. Previously, it seems, blacklisting them was on purpose to shut up pciserial_init_one() about record duplication. So, remove the entries from blacklist in order to get cards enumerated. Fixes: 7d8905d06405 ("serial: 8250_pci: Enable device after we check black list") Reported-by: Matt Turner Cc: Sergej Pupykin Cc: Alexandr Petrenko Signed-off-by: Andy Shevchenko Reviewed-and-Tested-by: Matt Turner Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 0d814a87acb2..4986b4aebe80 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -3345,9 +3345,7 @@ static const struct pci_device_id blacklist[] = { /* multi-io cards handled by parport_serial */ { PCI_DEVICE(0x4348, 0x7053), }, /* WCH CH353 2S1P */ { PCI_DEVICE(0x4348, 0x5053), }, /* WCH CH353 1S1P */ - { PCI_DEVICE(0x4348, 0x7173), }, /* WCH CH355 4S */ { PCI_DEVICE(0x1c00, 0x3250), }, /* WCH CH382 2S1P */ - { PCI_DEVICE(0x1c00, 0x3470), }, /* WCH CH384 4S */ /* Moxa Smartio MUE boards handled by 8250_moxa */ { PCI_VDEVICE(MOXA, 0x1024), }, -- GitLab From b124a1c182fa933f579281cc650f60ac47c7026d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 13 Jun 2018 17:08:59 +0200 Subject: [PATCH 0013/1291] serdev: fix memleak on module unload commit bc6cf3669d22371f573ab0305b3abf13887c0786 upstream. Make sure to free all resources associated with the ida on module exit. Fixes: cd6484e1830b ("serdev: Introduce new bus for serial attached devices") Cc: stable # 4.11 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serdev/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index 97db76afced2..ae2564ecddcd 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -482,6 +482,7 @@ EXPORT_SYMBOL_GPL(__serdev_device_driver_register); static void __exit serdev_exit(void) { bus_unregister(&serdev_bus_type); + ida_destroy(&ctrl_ida); } module_exit(serdev_exit); -- GitLab From b141de45e2dc73d6997e6bf7b8347b688bc7c5f7 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Thu, 14 Jun 2018 12:23:09 +0200 Subject: [PATCH 0014/1291] vt: prevent leaking uninitialized data to userspace via /dev/vcs* commit 21eff69aaaa0e766ca0ce445b477698dc6a9f55a upstream. KMSAN reported an infoleak when reading from /dev/vcs*: BUG: KMSAN: kernel-infoleak in vcs_read+0x18ba/0x1cc0 Call Trace: ... kmsan_copy_to_user+0x7a/0x160 mm/kmsan/kmsan.c:1253 copy_to_user ./include/linux/uaccess.h:184 vcs_read+0x18ba/0x1cc0 drivers/tty/vt/vc_screen.c:352 __vfs_read+0x1b2/0x9d0 fs/read_write.c:416 vfs_read+0x36c/0x6b0 fs/read_write.c:452 ... Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:189 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:315 __kmalloc+0x13a/0x350 mm/slub.c:3818 kmalloc ./include/linux/slab.h:517 vc_allocate+0x438/0x800 drivers/tty/vt/vt.c:787 con_install+0x8c/0x640 drivers/tty/vt/vt.c:2880 tty_driver_install_tty drivers/tty/tty_io.c:1224 tty_init_dev+0x1b5/0x1020 drivers/tty/tty_io.c:1324 tty_open_by_driver drivers/tty/tty_io.c:1959 tty_open+0x17b4/0x2ed0 drivers/tty/tty_io.c:2007 chrdev_open+0xc25/0xd90 fs/char_dev.c:417 do_dentry_open+0xccc/0x1440 fs/open.c:794 vfs_open+0x1b6/0x2f0 fs/open.c:908 ... Bytes 0-79 of 240 are uninitialized Consistently allocating |vc_screenbuf| with kzalloc() fixes the problem Reported-by: syzbot+17a8efdf800000@syzkaller.appspotmail.com Signed-off-by: Alexander Potapenko Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index de67abbda921..e77421e7bf46 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -782,7 +782,7 @@ int vc_allocate(unsigned int currcons) /* return 0 on success */ if (!*vc->vc_uni_pagedir_loc) con_set_default_unimap(vc); - vc->vc_screenbuf = kmalloc(vc->vc_screenbuf_size, GFP_KERNEL); + vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_KERNEL); if (!vc->vc_screenbuf) goto err_free; @@ -869,7 +869,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, if (new_screen_size > (4 << 20)) return -EINVAL; - newscreen = kmalloc(new_screen_size, GFP_USER); + newscreen = kzalloc(new_screen_size, GFP_USER); if (!newscreen) return -ENOMEM; -- GitLab From ce686c42476ec140756d3f1f16cfe8251f8d7520 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 10 Apr 2018 17:17:22 +0800 Subject: [PATCH 0015/1291] drm/amdgpu: Add APU support in vi_set_uvd_clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 819a23f83e3b2513cffbef418458a47ca02c36b3 upstream. fix the issue set uvd clock failed on CZ/ST which lead 1s delay when boot up. Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Acked-by: Christian König Acked-by: Shirish S Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/vi.c | 46 +++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 4968b6bb9466..81b0bd7e79a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -729,33 +729,57 @@ static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock, return r; tmp = RREG32_SMC(cntl_reg); - tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK | - CG_DCLK_CNTL__DCLK_DIVIDER_MASK); + + if (adev->flags & AMD_IS_APU) + tmp &= ~CG_DCLK_CNTL__DCLK_DIVIDER_MASK; + else + tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK | + CG_DCLK_CNTL__DCLK_DIVIDER_MASK); tmp |= dividers.post_divider; WREG32_SMC(cntl_reg, tmp); for (i = 0; i < 100; i++) { - if (RREG32_SMC(status_reg) & CG_DCLK_STATUS__DCLK_STATUS_MASK) - break; + tmp = RREG32_SMC(status_reg); + if (adev->flags & AMD_IS_APU) { + if (tmp & 0x10000) + break; + } else { + if (tmp & CG_DCLK_STATUS__DCLK_STATUS_MASK) + break; + } mdelay(10); } if (i == 100) return -ETIMEDOUT; - return 0; } +#define ixGNB_CLK1_DFS_CNTL 0xD82200F0 +#define ixGNB_CLK1_STATUS 0xD822010C +#define ixGNB_CLK2_DFS_CNTL 0xD8220110 +#define ixGNB_CLK2_STATUS 0xD822012C + static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) { int r; - r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS); - if (r) - return r; + if (adev->flags & AMD_IS_APU) { + r = vi_set_uvd_clock(adev, vclk, ixGNB_CLK2_DFS_CNTL, ixGNB_CLK2_STATUS); + if (r) + return r; - r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS); - if (r) - return r; + r = vi_set_uvd_clock(adev, dclk, ixGNB_CLK1_DFS_CNTL, ixGNB_CLK1_STATUS); + if (r) + return r; + } else { + r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS); + if (r) + return r; + + r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS); + if (r) + return r; + } return 0; } -- GitLab From 40e2064b8fc7025cf6def10ffc5d4ebc5dc8837e Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 10 Apr 2018 17:49:56 +0800 Subject: [PATCH 0016/1291] drm/amdgpu: Add APU support in vi_set_vce_clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 08ebb6e9f4fd7098c28e0ebbb42847cf0488ebb8 upstream. 1. fix set vce clocks failed on Cz/St which lead 1s delay when boot up. 2. remove the workaround in vce_v3_0.c Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Acked-by: Christian König Acked-by: Shirish S Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/vi.c | 31 +++++++++++++++++++++------ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index cf81065e3c5a..5183b46563f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -467,8 +467,8 @@ static int vce_v3_0_hw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; vce_v3_0_override_vce_clock_gating(adev, true); - if (!(adev->flags & AMD_IS_APU)) - amdgpu_asic_set_vce_clocks(adev, 10000, 10000); + + amdgpu_asic_set_vce_clocks(adev, 10000, 10000); for (i = 0; i < adev->vce.num_rings; i++) adev->vce.ring[i].ready = false; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 81b0bd7e79a9..0327e0a6802b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -758,6 +758,8 @@ static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock, #define ixGNB_CLK1_STATUS 0xD822010C #define ixGNB_CLK2_DFS_CNTL 0xD8220110 #define ixGNB_CLK2_STATUS 0xD822012C +#define ixGNB_CLK3_DFS_CNTL 0xD8220130 +#define ixGNB_CLK3_STATUS 0xD822014C static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) { @@ -789,6 +791,22 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) int r, i; struct atom_clock_dividers dividers; u32 tmp; + u32 reg_ctrl; + u32 reg_status; + u32 status_mask; + u32 reg_mask; + + if (adev->flags & AMD_IS_APU) { + reg_ctrl = ixGNB_CLK3_DFS_CNTL; + reg_status = ixGNB_CLK3_STATUS; + status_mask = 0x00010000; + reg_mask = CG_ECLK_CNTL__ECLK_DIVIDER_MASK; + } else { + reg_ctrl = ixCG_ECLK_CNTL; + reg_status = ixCG_ECLK_STATUS; + status_mask = CG_ECLK_STATUS__ECLK_STATUS_MASK; + reg_mask = CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | CG_ECLK_CNTL__ECLK_DIVIDER_MASK; + } r = amdgpu_atombios_get_clock_dividers(adev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, @@ -797,24 +815,25 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) return r; for (i = 0; i < 100; i++) { - if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK) + if (RREG32_SMC(reg_status) & status_mask) break; mdelay(10); } + if (i == 100) return -ETIMEDOUT; - tmp = RREG32_SMC(ixCG_ECLK_CNTL); - tmp &= ~(CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | - CG_ECLK_CNTL__ECLK_DIVIDER_MASK); + tmp = RREG32_SMC(reg_ctrl); + tmp &= ~reg_mask; tmp |= dividers.post_divider; - WREG32_SMC(ixCG_ECLK_CNTL, tmp); + WREG32_SMC(reg_ctrl, tmp); for (i = 0; i < 100; i++) { - if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK) + if (RREG32_SMC(reg_status) & status_mask) break; mdelay(10); } + if (i == 100) return -ETIMEDOUT; -- GitLab From dd19ea36f5963a7160dbc4526455d2fb4d9cb516 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Wed, 23 May 2018 11:18:43 +0800 Subject: [PATCH 0017/1291] drm/amdgpu: fix the missed vcn fw version report MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a0b2ac29415bb44d1c212184c1385a1abe68db5c upstream. It missed vcn.fw_version setting when init vcn microcode, and it will be used to report vcn ucode version via amdgpu_firmware_info sysfs interface. Signed-off-by: Huang Rui Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 041e0121590c..308a9755eae3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -85,6 +85,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) } hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); family_id = le32_to_cpu(hdr->ucode_version) & 0xff; version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; -- GitLab From 7d0ed747bc56ca05868423220c62e64ea525aad7 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Fri, 1 Jun 2018 16:05:32 -0400 Subject: [PATCH 0018/1291] drm/qxl: Call qxl_bo_unref outside atomic context commit 889ad63d41eea20184b0483e7e585e5b20fb6cfe upstream. "qxl_bo_unref" may sleep, but calling "qxl_release_map" causes "preempt_disable()" to be called and "preempt_enable()" isn't called until "qxl_release_unmap" is used. Move the call to "qxl_bo_unref" out from in between the two to avoid sleeping from an atomic context. This issue can be demonstrated on a kernel with CONFIG_LOCKDEP=y by creating a VM using QXL, using a desktop environment using Xorg, then moving the cursor on or off a window. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1571128 Fixes: 9428088c90b6 ("drm/qxl: reapply cursor after resetting primary") Cc: stable@vger.kernel.org Signed-off-by: Jeremy Cline Link: http://patchwork.freedesktop.org/patch/msgid/20180601200532.13619-1-jcline@redhat.com Signed-off-by: Gerd Hoffmann Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_display.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 9a9214ae0fb5..573bab222123 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -630,7 +630,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, struct qxl_cursor_cmd *cmd; struct qxl_cursor *cursor; struct drm_gem_object *obj; - struct qxl_bo *cursor_bo = NULL, *user_bo = NULL; + struct qxl_bo *cursor_bo = NULL, *user_bo = NULL, *old_cursor_bo = NULL; int ret; void *user_ptr; int size = 64*64*4; @@ -684,7 +684,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, cursor_bo, 0); cmd->type = QXL_CURSOR_SET; - qxl_bo_unref(&qcrtc->cursor_bo); + old_cursor_bo = qcrtc->cursor_bo; qcrtc->cursor_bo = cursor_bo; cursor_bo = NULL; } else { @@ -704,6 +704,9 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); qxl_release_fence_buffer_objects(release); + if (old_cursor_bo) + qxl_bo_unref(&old_cursor_bo); + qxl_bo_unref(&cursor_bo); return; -- GitLab From af597bb61370e337eec93ae655625d3dbd1fc0bb Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 17 Jun 2018 10:48:22 +0200 Subject: [PATCH 0019/1291] drm/atmel-hlcdc: check stride values in the first plane commit 9fcf2b3c1c0276650fea537c71b513d27d929b05 upstream. The statement always evaluates to true since the struct fields are arrays. This has shown up as a warning when compiling with clang: warning: address of array 'desc->layout.xstride' will always evaluate to 'true' [-Wpointer-bool-conversion] Check for values in the first plane instead. Fixes: 1a396789f65a ("drm: add Atmel HLCDC Display Controller support") Cc: Signed-off-by: Stefan Agner Signed-off-by: Boris Brezillon Link: https://patchwork.freedesktop.org/patch/msgid/20180617084826.31885-1-stefan@agner.ch Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c index 703c2d13603f..eb7c4cf19bf6 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c @@ -889,7 +889,7 @@ static int atmel_hlcdc_plane_init_properties(struct atmel_hlcdc_plane *plane, drm_object_attach_property(&plane->base.base, props->alpha, 255); - if (desc->layout.xstride && desc->layout.pstride) { + if (desc->layout.xstride[0] && desc->layout.pstride[0]) { int ret; ret = drm_plane_create_rotation_property(&plane->base, -- GitLab From a1bf87cfb6ea83a262d218b2bc123a11b949caaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 8 Jun 2018 12:58:15 +0200 Subject: [PATCH 0020/1291] drm/amdgpu: Use kvmalloc_array for allocating VRAM manager nodes array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6fa39bc1e01dab8b4f54b23e95a181a2ed5a2d38 upstream. It can be quite big, and there's no need for it to be physically contiguous. This is less likely to fail under memory pressure (has actually happened while running piglit). Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 26e900627971..2902f91bb0ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -140,7 +140,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node); } - nodes = kcalloc(num_nodes, sizeof(*nodes), GFP_KERNEL); + nodes = kvmalloc_array(num_nodes, sizeof(*nodes), + GFP_KERNEL | __GFP_ZERO); if (!nodes) return -ENOMEM; @@ -195,7 +196,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, drm_mm_remove_node(&nodes[i]); spin_unlock(&mgr->lock); - kfree(nodes); + kvfree(nodes); return r == -ENOSPC ? 0 : r; } @@ -234,7 +235,7 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, atomic64_sub(usage, &mgr->usage); atomic64_sub(vis_usage, &mgr->vis_usage); - kfree(mem->mm_node); + kvfree(mem->mm_node); mem->mm_node = NULL; } -- GitLab From 78e7000fe63f32058e913daf0c9fa2cff23ea206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Tue, 12 Jun 2018 12:07:33 +0200 Subject: [PATCH 0021/1291] drm/amdgpu: Refactor amdgpu_vram_mgr_bo_invisible_size helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5e9244ff585239630f15f8ad8e676bc91a94ca9e upstream. Preparation for the following fix, no functional change intended. Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 4d08957d2108..1360a24d2ede 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -747,8 +747,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, } if (domain == AMDGPU_GEM_DOMAIN_VRAM) { adev->vram_pin_size += amdgpu_bo_size(bo); - if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) - adev->invisible_pin_size += amdgpu_bo_size(bo); + adev->invisible_pin_size += amdgpu_vram_mgr_bo_invisible_size(bo); } else if (domain == AMDGPU_GEM_DOMAIN_GTT) { adev->gart_pin_size += amdgpu_bo_size(bo); } @@ -786,8 +785,7 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo) if (bo->tbo.mem.mem_type == TTM_PL_VRAM) { adev->vram_pin_size -= amdgpu_bo_size(bo); - if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) - adev->invisible_pin_size -= amdgpu_bo_size(bo); + adev->invisible_pin_size -= amdgpu_vram_mgr_bo_invisible_size(bo); } else if (bo->tbo.mem.mem_type == TTM_PL_TT) { adev->gart_pin_size -= amdgpu_bo_size(bo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 43093bffa2cf..557829a84778 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -64,6 +64,7 @@ extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem); uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man); +u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo); uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 2902f91bb0ce..86d8a961518e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -101,6 +101,22 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, adev->mc.visible_vram_size : end) - start; } +/** + * amdgpu_vram_mgr_bo_invisible_size - CPU invisible BO size + * + * @bo: &amdgpu_bo buffer object (must be in VRAM) + * + * Returns: + * How much of the given &amdgpu_bo buffer object lies in CPU invisible VRAM. + */ +u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo) +{ + if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) + return amdgpu_bo_size(bo); + + return 0; +} + /** * amdgpu_vram_mgr_new - allocate new ranges * -- GitLab From 1d795d1241d3269bd44d4ef8cfb4da3480ab744c Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 15 Jun 2018 20:06:05 +0100 Subject: [PATCH 0022/1291] drm/i915: Enable provoking vertex fix on Gen9 systems. commit 7a3727f385dc64773db1c144f6b15c1e9d4735bb upstream. The SF and clipper units mishandle the provoking vertex in some cases, which can cause misrendering with shaders that use flat shaded inputs. There are chicken bits in 3D_CHICKEN3 (for SF) and FF_SLICE_CHICKEN (for the clipper) that work around the issue. These registers are unfortunately not part of the logical context (even the power context), and so we must reload them every time we start executing in a context. Bugzilla: https://bugs.freedesktop.org/103047 Signed-off-by: Kenneth Graunke Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180615190605.16238-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Cc: stable@vger.kernel.org (cherry picked from commit b77422f80337d363eed60c8c48db9cb6e33085c9) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_reg.h | 5 +++++ drivers/gpu/drm/i915/intel_lrc.c | 12 +++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 61a2203b75df..be813b2738c1 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2484,12 +2484,17 @@ enum i915_power_well_id { #define _3D_CHICKEN _MMIO(0x2084) #define _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB (1 << 10) #define _3D_CHICKEN2 _MMIO(0x208c) + +#define FF_SLICE_CHICKEN _MMIO(0x2088) +#define FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX (1 << 1) + /* Disables pipelining of read flushes past the SF-WIZ interface. * Required on all Ironlake steppings according to the B-Spec, but the * particular danger of not doing so is not specified. */ # define _3D_CHICKEN2_WM_READ_PIPELINED (1 << 14) #define _3D_CHICKEN3 _MMIO(0x2090) +#define _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX (1 << 12) #define _3D_CHICKEN_SF_DISABLE_OBJEND_CULL (1 << 10) #define _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL (1 << 5) #define _3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(x) ((x)<<1) /* gen8+ */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6f972e6ec663..d638b641b760 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1067,11 +1067,21 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ batch = gen8_emit_flush_coherentl3_wa(engine, batch); + *batch++ = MI_LOAD_REGISTER_IMM(3); + /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ - *batch++ = MI_LOAD_REGISTER_IMM(1); *batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2); *batch++ = _MASKED_BIT_DISABLE( GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE); + + /* BSpec: 11391 */ + *batch++ = i915_mmio_reg_offset(FF_SLICE_CHICKEN); + *batch++ = _MASKED_BIT_ENABLE(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX); + + /* BSpec: 11299 */ + *batch++ = i915_mmio_reg_offset(_3D_CHICKEN3); + *batch++ = _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX); + *batch++ = MI_NOOP; /* WaClearSlmSpaceAtContextSwitch:kbl */ -- GitLab From 1ea5ed0cadcae75f733d7bd870fea90d05f42528 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 2 May 2018 14:07:42 +0200 Subject: [PATCH 0023/1291] netfilter: nf_tables: nft_compat: fix refcount leak on xt module commit b8e9dc1c75714ceb53615743e1036f76e00f5a17 upstream. Taehee Yoo reported following bug: iptables-compat -I OUTPUT -m cpu --cpu 0 iptables-compat -F lsmod |grep xt_cpu xt_cpu 16384 1 Quote: "When above command is given, a netlink message has two expressions that are the cpu compat and the nft_counter. The nft_expr_type_get() in the nf_tables_expr_parse() successes first expression then, calls select_ops callback. (allocates memory and holds module) But, second nft_expr_type_get() in the nf_tables_expr_parse() returns -EAGAIN because of request_module(). In that point, by the 'goto err1', the 'module_put(info[i].ops->type->owner)' is called. There is no release routine." The core problem is that unlike all other expression, nft_compat select_ops has side effects. 1. it allocates dynamic memory which holds an nft ops struct. In all other expressions, ops has static storage duration. 2. It grabs references to the xt module that it is supposed to invoke. Depending on where things go wrong, error unwinding doesn't always do the right thing. In the above scenario, a new nft_compat_expr is created and xt_cpu module gets loaded with a refcount of 1. Due to to -EAGAIN, the netlink messages get re-parsed. When that happens, nft_compat finds that xt_cpu is already present and increments module refcount again. This fixes the problem by making select_ops to have no visible side effects and removes all extra module_get/put. When select_ops creates a new nft_compat expression, the new expression has a refcount of 0, and the xt module gets its refcount incremented. When error happens, the next call finds existing entry, but will no longer increase the reference count -- the presence of existing nft_xt means we already hold a module reference. Because nft_xt_put is only called from nft_compat destroy hook, it will never see the initial zero reference count. ->destroy can only be called after ->init(), and that will increase the refcount. Lastly, we now free nft_xt struct with kfree_rcu. Else, we get use-after free in nf_tables_rule_destroy: while (expr != nft_expr_last(rule) && expr->ops) { nf_tables_expr_destroy(ctx, expr); expr = nft_expr_next(expr); // here nft_expr_next() dereferences expr->ops. This is safe for all users, as ops have static storage duration. In nft_compat case however, its ->destroy callback can free the memory that hold the ops structure. Tested-by: Taehee Yoo Reported-by: Taehee Yoo Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_compat.c | 92 ++++++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 34 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index b89f4f65b2a0..4e3030113c7a 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -27,14 +27,24 @@ struct nft_xt { struct list_head head; struct nft_expr_ops ops; unsigned int refcnt; + + /* Unlike other expressions, ops doesn't have static storage duration. + * nft core assumes they do. We use kfree_rcu so that nft core can + * can check expr->ops->size even after nft_compat->destroy() frees + * the nft_xt struct that holds the ops structure. + */ + struct rcu_head rcu_head; }; -static void nft_xt_put(struct nft_xt *xt) +static bool nft_xt_put(struct nft_xt *xt) { if (--xt->refcnt == 0) { list_del(&xt->head); - kfree(xt); + kfree_rcu(xt, rcu_head); + return true; } + + return false; } static int nft_compat_chain_validate_dependency(const char *tablename, @@ -226,6 +236,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_target *target = expr->ops->data; struct xt_tgchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); + struct nft_xt *nft_xt; u16 proto = 0; bool inv = false; union nft_entry e = {}; @@ -236,25 +247,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (ctx->nla[NFTA_RULE_COMPAT]) { ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); if (ret < 0) - goto err; + return ret; } nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); ret = xt_check_target(&par, size, proto, inv); if (ret < 0) - goto err; + return ret; /* The standard target cannot be used */ - if (target->target == NULL) { - ret = -EINVAL; - goto err; - } + if (!target->target) + return -EINVAL; + nft_xt = container_of(expr->ops, struct nft_xt, ops); + nft_xt->refcnt++; return 0; -err: - module_put(target->me); - return ret; } static void @@ -271,8 +279,8 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.target->destroy != NULL) par.target->destroy(&par); - nft_xt_put(container_of(expr->ops, struct nft_xt, ops)); - module_put(target->me); + if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) + module_put(target->me); } static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -411,6 +419,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_match *match = expr->ops->data; struct xt_mtchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); + struct nft_xt *nft_xt; u16 proto = 0; bool inv = false; union nft_entry e = {}; @@ -421,19 +430,18 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (ctx->nla[NFTA_RULE_COMPAT]) { ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); if (ret < 0) - goto err; + return ret; } nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); ret = xt_check_match(&par, size, proto, inv); if (ret < 0) - goto err; + return ret; + nft_xt = container_of(expr->ops, struct nft_xt, ops); + nft_xt->refcnt++; return 0; -err: - module_put(match->me); - return ret; } static void @@ -450,8 +458,8 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.match->destroy != NULL) par.match->destroy(&par); - nft_xt_put(container_of(expr->ops, struct nft_xt, ops)); - module_put(match->me); + if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) + module_put(match->me); } static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -654,13 +662,8 @@ nft_match_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_match, &nft_match_list, head) { struct xt_match *match = nft_match->ops.data; - if (nft_match_cmp(match, mt_name, rev, family)) { - if (!try_module_get(match->me)) - return ERR_PTR(-ENOENT); - - nft_match->refcnt++; + if (nft_match_cmp(match, mt_name, rev, family)) return &nft_match->ops; - } } match = xt_request_find_match(family, mt_name, rev); @@ -679,7 +682,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, goto err; } - nft_match->refcnt = 1; + nft_match->refcnt = 0; nft_match->ops.type = &nft_match_type; nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); nft_match->ops.eval = nft_match_eval; @@ -739,13 +742,8 @@ nft_target_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_target, &nft_target_list, head) { struct xt_target *target = nft_target->ops.data; - if (nft_target_cmp(target, tg_name, rev, family)) { - if (!try_module_get(target->me)) - return ERR_PTR(-ENOENT); - - nft_target->refcnt++; + if (nft_target_cmp(target, tg_name, rev, family)) return &nft_target->ops; - } } target = xt_request_find_target(family, tg_name, rev); @@ -764,7 +762,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, goto err; } - nft_target->refcnt = 1; + nft_target->refcnt = 0; nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init; @@ -825,6 +823,32 @@ static int __init nft_compat_module_init(void) static void __exit nft_compat_module_exit(void) { + struct nft_xt *xt, *next; + + /* list should be empty here, it can be non-empty only in case there + * was an error that caused nft_xt expr to not be initialized fully + * and noone else requested the same expression later. + * + * In this case, the lists contain 0-refcount entries that still + * hold module reference. + */ + list_for_each_entry_safe(xt, next, &nft_target_list, head) { + struct xt_target *target = xt->ops.data; + + if (WARN_ON_ONCE(xt->refcnt)) + continue; + module_put(target->me); + kfree(xt); + } + + list_for_each_entry_safe(xt, next, &nft_match_list, head) { + struct xt_match *match = xt->ops.data; + + if (WARN_ON_ONCE(xt->refcnt)) + continue; + module_put(match->me); + kfree(xt); + } nfnetlink_subsys_unregister(&nfnl_compat_subsys); nft_unregister_expr(&nft_target_type); nft_unregister_expr(&nft_match_type); -- GitLab From ea200cdd605662de24f7cd92e03aa6a335e0b4da Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 7 May 2018 15:22:35 +0200 Subject: [PATCH 0024/1291] netfilter: nft_compat: prepare for indirect info storage commit 8bdf164744b2c7f63561846c01cff3db597f282d upstream. Next patch will make it possible for *info to be stored in a separate allocation instead of the expr private area. This removes the 'expr priv area is info blob' assumption from the match init/destroy/eval functions. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_compat.c | 47 +++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 4e3030113c7a..025eb37bd5c5 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -324,11 +324,11 @@ static int nft_target_validate(const struct nft_ctx *ctx, return 0; } -static void nft_match_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void __nft_match_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt, + void *info) { - void *info = nft_expr_priv(expr); struct xt_match *match = expr->ops->data; struct sk_buff *skb = pkt->skb; bool ret; @@ -352,6 +352,13 @@ static void nft_match_eval(const struct nft_expr *expr, } } +static void nft_match_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + __nft_match_eval(expr, regs, pkt, nft_expr_priv(expr)); +} + static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING }, [NFTA_MATCH_REV] = { .type = NLA_U32 }, @@ -412,10 +419,10 @@ static void match_compat_from_user(struct xt_match *m, void *in, void *out) } static int -nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) +__nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[], + void *info) { - void *info = nft_expr_priv(expr); struct xt_match *match = expr->ops->data; struct xt_mtchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); @@ -444,11 +451,18 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return 0; } +static int +nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + return __nft_match_init(ctx, expr, tb, nft_expr_priv(expr)); +} + static void -nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +__nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, + void *info) { struct xt_match *match = expr->ops->data; - void *info = nft_expr_priv(expr); struct xt_mtdtor_param par; par.net = ctx->net; @@ -462,9 +476,15 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) module_put(match->me); } -static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) +static void +nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); +} + +static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr, + void *info) { - void *info = nft_expr_priv(expr); struct xt_match *match = expr->ops->data; if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) || @@ -478,6 +498,11 @@ static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) return -1; } +static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + return __nft_match_dump(skb, expr, nft_expr_priv(expr)); +} + static int nft_match_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) -- GitLab From 365e73e07fba4c343510adf49d18cb2b97337df8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 7 May 2018 15:22:36 +0200 Subject: [PATCH 0025/1291] netfilter: nft_compat: fix handling of large matchinfo size commit 732a8049f365f514d0607e03938491bf6cb0d620 upstream. currently matchinfo gets stored in the expression, but some xt matches are very large. To handle those we either need to switch nft core to kvmalloc and increase size limit, or allocate the info blob of large matches separately. This does the latter, this limits the scope of the changes to nft_compat. I picked a threshold of 192, this allows most matches to work as before and handle only few ones via separate alloation (cgroup, u32, sctp, rt). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_compat.c | 64 +++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 025eb37bd5c5..3bd637eadc42 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -36,6 +36,13 @@ struct nft_xt { struct rcu_head rcu_head; }; +/* Used for matches where *info is larger than X byte */ +#define NFT_MATCH_LARGE_THRESH 192 + +struct nft_xt_match_priv { + void *info; +}; + static bool nft_xt_put(struct nft_xt *xt) { if (--xt->refcnt == 0) { @@ -352,6 +359,15 @@ static void __nft_match_eval(const struct nft_expr *expr, } } +static void nft_match_large_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + + __nft_match_eval(expr, regs, pkt, priv->info); +} + static void nft_match_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -458,6 +474,24 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return __nft_match_init(ctx, expr, tb, nft_expr_priv(expr)); } +static int +nft_match_large_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + struct xt_match *m = expr->ops->data; + int ret; + + priv->info = kmalloc(XT_ALIGN(m->matchsize), GFP_KERNEL); + if (!priv->info) + return -ENOMEM; + + ret = __nft_match_init(ctx, expr, tb, priv->info); + if (ret) + kfree(priv->info); + return ret; +} + static void __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, void *info) @@ -482,6 +516,15 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); } +static void +nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + + __nft_match_destroy(ctx, expr, priv->info); + kfree(priv->info); +} + static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr, void *info) { @@ -503,6 +546,13 @@ static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) return __nft_match_dump(skb, expr, nft_expr_priv(expr)); } +static int nft_match_large_dump(struct sk_buff *skb, const struct nft_expr *e) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(e); + + return __nft_match_dump(skb, e, priv->info); +} + static int nft_match_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) @@ -670,6 +720,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, { struct nft_xt *nft_match; struct xt_match *match; + unsigned int matchsize; char *mt_name; u32 rev, family; int err; @@ -709,7 +760,6 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->refcnt = 0; nft_match->ops.type = &nft_match_type; - nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; nft_match->ops.destroy = nft_match_destroy; @@ -717,6 +767,18 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.validate = nft_match_validate; nft_match->ops.data = match; + matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); + if (matchsize > NFT_MATCH_LARGE_THRESH) { + matchsize = NFT_EXPR_SIZE(sizeof(struct nft_xt_match_priv)); + + nft_match->ops.eval = nft_match_large_eval; + nft_match->ops.init = nft_match_large_init; + nft_match->ops.destroy = nft_match_large_destroy; + nft_match->ops.dump = nft_match_large_dump; + } + + nft_match->ops.size = matchsize; + list_add(&nft_match->head, &nft_match_list); return &nft_match->ops; -- GitLab From 4ae6a7afae5362289cc99aa80f7d6229521888c3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 6 May 2018 00:47:20 +0200 Subject: [PATCH 0026/1291] netfilter: nf_tables: don't assume chain stats are set when jumplabel is set commit 009240940e84c1c089af88b454f7e804a4c5bd1b upstream. nft_chain_stats_replace() and all other spots assume ->stats can be NULL, but nft_update_chain_stats does not. It must do this check, just because the jump label is set doesn't mean all basechains have stats assigned. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_core.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index dfd0bf3810d2..942702a2776f 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -119,15 +119,22 @@ DEFINE_STATIC_KEY_FALSE(nft_counters_enabled); static noinline void nft_update_chain_stats(const struct nft_chain *chain, const struct nft_pktinfo *pkt) { + struct nft_base_chain *base_chain; struct nft_stats *stats; - local_bh_disable(); - stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats)); - u64_stats_update_begin(&stats->syncp); - stats->pkts++; - stats->bytes += pkt->skb->len; - u64_stats_update_end(&stats->syncp); - local_bh_enable(); + base_chain = nft_base_chain(chain); + if (!base_chain->stats) + return; + + stats = this_cpu_ptr(rcu_dereference(base_chain->stats)); + if (stats) { + local_bh_disable(); + u64_stats_update_begin(&stats->syncp); + stats->pkts++; + stats->bytes += pkt->skb->len; + u64_stats_update_end(&stats->syncp); + local_bh_enable(); + } } struct nft_jumpstack { -- GitLab From 2b93cb2861dedfc43d7eb82c37c4eafbc385fef8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 8 May 2018 02:43:57 +0200 Subject: [PATCH 0027/1291] netfilter: nf_tables: bogus EBUSY in chain deletions commit bb7b40aecbf778c0c83a5bd62b0f03ca9f49a618 upstream. When removing a rule that jumps to chain and such chain in the same batch, this bogusly hits EBUSY. Add activate and deactivate operations to expression that can be called from the preparation and the commit/abort phases. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/net/netfilter/nf_tables.h | 5 ++++ net/netfilter/nf_tables_api.c | 46 ++++++++++++++++++++++++++++--- net/netfilter/nft_immediate.c | 15 ++++++++-- 3 files changed, 59 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 079c69cae2f6..59a4f50ffe8d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -177,6 +177,7 @@ struct nft_data_desc { int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, unsigned int size, struct nft_data_desc *desc, const struct nlattr *nla); +void nft_data_hold(const struct nft_data *data, enum nft_data_types type); void nft_data_release(const struct nft_data *data, enum nft_data_types type); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len); @@ -731,6 +732,10 @@ struct nft_expr_ops { int (*init)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); + void (*activate)(const struct nft_ctx *ctx, + const struct nft_expr *expr); + void (*deactivate)(const struct nft_ctx *ctx, + const struct nft_expr *expr); void (*destroy)(const struct nft_ctx *ctx, const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index cf30c440f7a7..8a2027d7aaa3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -220,6 +220,34 @@ static int nft_delchain(struct nft_ctx *ctx) return err; } +static void nft_rule_expr_activate(const struct nft_ctx *ctx, + struct nft_rule *rule) +{ + struct nft_expr *expr; + + expr = nft_expr_first(rule); + while (expr != nft_expr_last(rule) && expr->ops) { + if (expr->ops->activate) + expr->ops->activate(ctx, expr); + + expr = nft_expr_next(expr); + } +} + +static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, + struct nft_rule *rule) +{ + struct nft_expr *expr; + + expr = nft_expr_first(rule); + while (expr != nft_expr_last(rule) && expr->ops) { + if (expr->ops->deactivate) + expr->ops->deactivate(ctx, expr); + + expr = nft_expr_next(expr); + } +} + static int nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) { @@ -265,6 +293,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule) nft_trans_destroy(trans); return err; } + nft_rule_expr_deactivate(ctx, rule); return 0; } @@ -2218,6 +2247,13 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, kfree(rule); } +static void nf_tables_rule_release(const struct nft_ctx *ctx, + struct nft_rule *rule) +{ + nft_rule_expr_deactivate(ctx, rule); + nf_tables_rule_destroy(ctx, rule); +} + #define NFT_RULE_MAXEXPRS 128 static struct nft_expr_info *info; @@ -2385,7 +2421,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return 0; err2: - nf_tables_rule_destroy(&ctx, rule); + nf_tables_rule_release(&ctx, rule); err1: for (i = 0; i < n; i++) { if (info[i].ops != NULL) @@ -4054,7 +4090,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, * NFT_GOTO verdicts. This function must be called on active data objects * from the second phase of the commit protocol. */ -static void nft_data_hold(const struct nft_data *data, enum nft_data_types type) +void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { @@ -5221,10 +5257,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWRULE: trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); + nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELRULE: trans->ctx.chain->use++; nft_clear(trans->ctx.net, nft_trans_rule(trans)); + nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: @@ -5798,7 +5836,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); ctx->chain->use--; - nf_tables_rule_destroy(ctx, rule); + nf_tables_rule_release(ctx, rule); } list_del(&ctx->chain->list); ctx->table->use--; @@ -5832,7 +5870,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) list_for_each_entry_safe(rule, nr, &chain->rules, list) { list_del(&rule->list); chain->use--; - nf_tables_rule_destroy(&ctx, rule); + nf_tables_rule_release(&ctx, rule); } } list_for_each_entry_safe(set, ns, &table->sets, list) { diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 4717d7796927..aa87ff8beae8 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -69,8 +69,16 @@ static int nft_immediate_init(const struct nft_ctx *ctx, return err; } -static void nft_immediate_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_immediate_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg)); +} + +static void nft_immediate_deactivate(const struct nft_ctx *ctx, + const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); @@ -108,7 +116,8 @@ static const struct nft_expr_ops nft_imm_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), .eval = nft_immediate_eval, .init = nft_immediate_init, - .destroy = nft_immediate_destroy, + .activate = nft_immediate_activate, + .deactivate = nft_immediate_deactivate, .dump = nft_immediate_dump, .validate = nft_immediate_validate, }; -- GitLab From 491b1a866e4ad2f5b6f5d60a23e16ae77ec37d5c Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 17 May 2018 22:49:49 +0900 Subject: [PATCH 0028/1291] netfilter: nft_meta: fix wrong value dereference in nft_meta_set_eval commit 97a0549b15a0b466c47f6a0143a490a082c64b4e upstream. In the nft_meta_set_eval, nftrace value is dereferenced as u32 from sreg. But correct type is u8. so that sometimes incorrect value is dereferenced. Steps to reproduce: %nft add table ip filter %nft add chain ip filter input { type filter hook input priority 4\; } %nft add rule ip filter input nftrace set 0 %nft monitor Sometimes, we can see trace messages. trace id 16767227 ip filter input packet: iif "enp2s0" ether saddr xx:xx:xx:xx:xx:xx ether daddr xx:xx:xx:xx:xx:xx ip saddr 192.168.0.1 ip daddr 255.255.255.255 ip dscp cs0 ip ecn not-ect ip trace id 16767227 ip filter input rule nftrace set 0 (verdict continue) trace id 16767227 ip filter input verdict continue trace id 16767227 ip filter input Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_meta.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 5a60eb23a7ed..c71184d4eac1 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -229,7 +229,7 @@ void nft_meta_set_eval(const struct nft_expr *expr, struct sk_buff *skb = pkt->skb; u32 *sreg = ®s->data[meta->sreg]; u32 value = *sreg; - u8 pkt_type; + u8 value8; switch (meta->key) { case NFT_META_MARK: @@ -239,15 +239,17 @@ void nft_meta_set_eval(const struct nft_expr *expr, skb->priority = value; break; case NFT_META_PKTTYPE: - pkt_type = nft_reg_load8(sreg); + value8 = nft_reg_load8(sreg); - if (skb->pkt_type != pkt_type && - skb_pkt_type_ok(pkt_type) && + if (skb->pkt_type != value8 && + skb_pkt_type_ok(value8) && skb_pkt_type_ok(skb->pkt_type)) - skb->pkt_type = pkt_type; + skb->pkt_type = value8; break; case NFT_META_NFTRACE: - skb->nf_trace = !!value; + value8 = nft_reg_load8(sreg); + + skb->nf_trace = !!value8; break; default: WARN_ON(1); -- GitLab From d3a9b8a511812fce32be5253e8ef2a3d54b34b4d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 27 May 2018 21:08:13 +0200 Subject: [PATCH 0029/1291] netfilter: nf_tables: disable preemption in nft_update_chain_stats() commit ad9d9e85072b668731f356be0a3750a3ba22a607 upstream. This patch fixes the following splat. [118709.054937] BUG: using smp_processor_id() in preemptible [00000000] code: test/1571 [118709.054970] caller is nft_update_chain_stats.isra.4+0x53/0x97 [nf_tables] [118709.054980] CPU: 2 PID: 1571 Comm: test Not tainted 4.17.0-rc6+ #335 [...] [118709.054992] Call Trace: [118709.055011] dump_stack+0x5f/0x86 [118709.055026] check_preemption_disabled+0xd4/0xe4 Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 942702a2776f..40e744572283 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -126,15 +126,15 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain, if (!base_chain->stats) return; + local_bh_disable(); stats = this_cpu_ptr(rcu_dereference(base_chain->stats)); if (stats) { - local_bh_disable(); u64_stats_update_begin(&stats->syncp); stats->pkts++; stats->bytes += pkt->skb->len; u64_stats_update_end(&stats->syncp); - local_bh_enable(); } + local_bh_enable(); } struct nft_jumpstack { -- GitLab From 174757e28b7bc261f8ddd12467786f286717a108 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 29 May 2018 01:14:12 +0900 Subject: [PATCH 0030/1291] netfilter: nf_tables: increase nft_counters_enabled in nft_chain_stats_replace() commit bbb8c61f97e3a2dd91b30d3e57b7964a67569d11 upstream. When a chain is updated, a counter can be attached. if so, the nft_counters_enabled should be increased. test commands: %nft add table ip filter %nft add chain ip filter input { type filter hook input priority 4\; } %iptables-compat -Z input %nft delete chain ip filter input we can see below messages. [ 286.443720] jump label: negative count! [ 286.448278] WARNING: CPU: 0 PID: 1459 at kernel/jump_label.c:197 __static_key_slow_dec_cpuslocked+0x6f/0xf0 [ 286.449144] Modules linked in: nf_tables nfnetlink ip_tables x_tables [ 286.449144] CPU: 0 PID: 1459 Comm: nft Tainted: G W 4.17.0-rc2+ #12 [ 286.449144] RIP: 0010:__static_key_slow_dec_cpuslocked+0x6f/0xf0 [ 286.449144] RSP: 0018:ffff88010e5176f0 EFLAGS: 00010286 [ 286.449144] RAX: 000000000000001b RBX: ffffffffc0179500 RCX: ffffffffb8a82522 [ 286.449144] RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffff88011b7e5eac [ 286.449144] RBP: 0000000000000000 R08: ffffed00236fce5c R09: ffffed00236fce5b [ 286.449144] R10: ffffffffc0179503 R11: ffffed00236fce5c R12: 0000000000000000 [ 286.449144] R13: ffff88011a28e448 R14: ffff88011a28e470 R15: dffffc0000000000 [ 286.449144] FS: 00007f0384328700(0000) GS:ffff88011b600000(0000) knlGS:0000000000000000 [ 286.449144] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 286.449144] CR2: 00007f038394bf10 CR3: 0000000104a86000 CR4: 00000000001006f0 [ 286.449144] Call Trace: [ 286.449144] static_key_slow_dec+0x6a/0x70 [ 286.449144] nf_tables_chain_destroy+0x19d/0x210 [nf_tables] [ 286.449144] nf_tables_commit+0x1891/0x1c50 [nf_tables] [ 286.449144] nfnetlink_rcv+0x1148/0x13d0 [nfnetlink] [ ... ] Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8a2027d7aaa3..f70c5329ee6a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1266,8 +1266,10 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain, rcu_assign_pointer(chain->stats, newstats); synchronize_rcu(); free_percpu(oldstats); - } else + } else { rcu_assign_pointer(chain->stats, newstats); + static_branch_inc(&nft_counters_enabled); + } } static void nf_tables_chain_destroy(struct nft_chain *chain) -- GitLab From 082711fa317845b535faa0aeea7ee3ff82a08301 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 9 May 2018 13:22:56 +0100 Subject: [PATCH 0031/1291] netfilter: nf_tables: fix memory leak on error exit return commit f0dfd7a2b35b02030949100247d851b793cb275f upstream. Currently the -EBUSY error return path is not free'ing resources allocated earlier, leaving a memory leak. Fix this by exiting via the error exit label err5 that performs the necessary resource clean up. Detected by CoverityScan, CID#1432975 ("Resource leak") Fixes: 9744a6fcefcb ("netfilter: nf_tables: check if same extensions are set when adding elements") Signed-off-by: Colin Ian King Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f70c5329ee6a..155b8098731e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3999,8 +3999,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) || nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^ - nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) - return -EBUSY; + nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) { + err = -EBUSY; + goto err5; + } if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && memcmp(nft_set_ext_data(ext), -- GitLab From 44956f98fd8b4f235f8f40a367955f55f0477b37 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Mar 2018 15:25:37 +0100 Subject: [PATCH 0032/1291] netfilter: nf_tables: add missing netlink attrs to policies commit 467697d289e7e6e1b15910d99096c0da08c56d5b upstream. Fixes: 8aeff920dcc9 ("netfilter: nf_tables: add stateful object reference to set elements") Fixes: f25ad2e907f1 ("netfilter: nf_tables: prepare for expressions associated to set elements") Fixes: 1a94e38d254b ("netfilter: nf_tables: add NFTA_RULE_ID attribute") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 155b8098731e..60936bca3181 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1978,6 +1978,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { [NFTA_RULE_POSITION] = { .type = NLA_U64 }, [NFTA_RULE_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, + [NFTA_RULE_ID] = { .type = NLA_U32 }, }; static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, @@ -3412,6 +3413,8 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { [NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 }, [NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, + [NFTA_SET_ELEM_EXPR] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_OBJREF] = { .type = NLA_STRING }, }; static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { -- GitLab From b8d8cde449fd51db0bb407dece03d36ced1acb83 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 29 May 2018 01:13:45 +0900 Subject: [PATCH 0033/1291] netfilter: nf_tables: fix NULL-ptr in nf_tables_dump_obj() commit 360cc79d9d299ce297b205508276285ceffc5fa8 upstream. The table field in nft_obj_filter is not an array. In order to check tablename, we should check if the pointer is set. Test commands: %nft add table ip filter %nft add counter ip filter ct1 %nft reset counters Splat looks like: [ 306.510504] kasan: CONFIG_KASAN_INLINE enabled [ 306.516184] kasan: GPF could be caused by NULL-ptr deref or user memory access [ 306.524775] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 306.528284] Modules linked in: nft_objref nft_counter nf_tables nfnetlink ip_tables x_tables [ 306.528284] CPU: 0 PID: 1488 Comm: nft Not tainted 4.17.0-rc4+ #17 [ 306.528284] Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 07/08/2015 [ 306.528284] RIP: 0010:nf_tables_dump_obj+0x52c/0xa70 [nf_tables] [ 306.528284] RSP: 0018:ffff8800b6cb7520 EFLAGS: 00010246 [ 306.528284] RAX: 0000000000000000 RBX: ffff8800b6c49820 RCX: 0000000000000000 [ 306.528284] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffffed0016d96e9a [ 306.528284] RBP: ffff8800b6cb75c0 R08: ffffed00236fce7c R09: ffffed00236fce7b [ 306.528284] R10: ffffffff9f6241e8 R11: ffffed00236fce7c R12: ffff880111365108 [ 306.528284] R13: 0000000000000000 R14: ffff8800b6c49860 R15: ffff8800b6c49860 [ 306.528284] FS: 00007f838b007700(0000) GS:ffff88011b600000(0000) knlGS:0000000000000000 [ 306.528284] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 306.528284] CR2: 00007ffeafabcf78 CR3: 00000000b6cbe000 CR4: 00000000001006f0 [ 306.528284] Call Trace: [ 306.528284] netlink_dump+0x470/0xa20 [ 306.528284] __netlink_dump_start+0x5ae/0x690 [ 306.528284] ? nf_tables_getobj+0x1b3/0x740 [nf_tables] [ 306.528284] nf_tables_getobj+0x2f5/0x740 [nf_tables] [ 306.528284] ? nft_obj_notify+0x100/0x100 [nf_tables] [ 306.528284] ? nf_tables_getobj+0x740/0x740 [nf_tables] [ 306.528284] ? nf_tables_dump_flowtable_done+0x70/0x70 [nf_tables] [ 306.528284] ? nft_obj_notify+0x100/0x100 [nf_tables] [ 306.528284] nfnetlink_rcv_msg+0x8ff/0x932 [nfnetlink] [ 306.528284] ? nfnetlink_rcv_msg+0x216/0x932 [nfnetlink] [ 306.528284] netlink_rcv_skb+0x1c9/0x2f0 [ 306.528284] ? nfnetlink_bind+0x1d0/0x1d0 [nfnetlink] [ 306.528284] ? debug_check_no_locks_freed+0x270/0x270 [ 306.528284] ? netlink_ack+0x7a0/0x7a0 [ 306.528284] ? ns_capable_common+0x6e/0x110 [ ... ] Fixes: e46abbcc05aa8 ("netfilter: nf_tables: Allow table names of up to 255 chars") Signed-off-by: Taehee Yoo Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 60936bca3181..85b549e84104 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4614,7 +4614,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); - if (filter && filter->table[0] && + if (filter && filter->table && strcmp(filter->table, table->name)) goto cont; if (filter && -- GitLab From e44e4cf3a8dbfee3f6e078f254c26e5a5168c6a2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 19 Oct 2017 12:17:16 +1100 Subject: [PATCH 0034/1291] md: always hold reconfig_mutex when calling mddev_suspend() commit 4d5324f760aacaefeb721b172aa14bf66045c332 upstream. Most often mddev_suspend() is called with reconfig_mutex held. Make this a requirement in preparation a subsequent patch. Also require reconfig_mutex to be held for mddev_resume(), partly for symmetry and partly to guarantee no races with incr/decr of mddev->suspend. Taking the mutex in r5c_disable_writeback_async() is a little tricky as this is called from a work queue via log->disable_writeback_work, and flush_work() is called on that while holding ->reconfig_mutex. If the work item hasn't run before flush_work() is called, the work function will not be able to get the mutex. So we use mddev_trylock() inside the wait_event() call, and have that abort when conf->log is set to NULL, which happens before flush_work() is called. We wait in mddev->sb_wait and ensure this is woken when any of the conditions change. This requires waking mddev->sb_wait in mddev_unlock(). This is only like to trigger extra wake_ups of threads that needn't be woken when metadata is being written, and that doesn't happen often enough that the cost would be noticeable. Signed-off-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-raid.c | 10 ++++++++-- drivers/md/md.c | 3 +++ drivers/md/raid5-cache.c | 18 +++++++++++++----- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 33834db7c0a0..38a2ac24428e 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3637,8 +3637,11 @@ static void raid_postsuspend(struct dm_target *ti) { struct raid_set *rs = ti->private; - if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) + if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { + mddev_lock_nointr(&rs->md); mddev_suspend(&rs->md); + mddev_unlock(&rs->md); + } rs->md.ro = 1; } @@ -3898,8 +3901,11 @@ static void raid_resume(struct dm_target *ti) if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS)) clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) + if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { + mddev_lock_nointr(mddev); mddev_resume(mddev); + mddev_unlock(mddev); + } } static struct target_type raid_target = { diff --git a/drivers/md/md.c b/drivers/md/md.c index 7143c8b9284b..757f12d49540 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -344,6 +344,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) void mddev_suspend(struct mddev *mddev) { WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk); + lockdep_assert_held(&mddev->reconfig_mutex); if (mddev->suspended++) return; synchronize_rcu(); @@ -357,6 +358,7 @@ EXPORT_SYMBOL_GPL(mddev_suspend); void mddev_resume(struct mddev *mddev) { + lockdep_assert_held(&mddev->reconfig_mutex); if (--mddev->suspended) return; wake_up(&mddev->sb_wait); @@ -663,6 +665,7 @@ void mddev_unlock(struct mddev *mddev) */ spin_lock(&pers_lock); md_wakeup_thread(mddev->thread); + wake_up(&mddev->sb_wait); spin_unlock(&pers_lock); } EXPORT_SYMBOL_GPL(mddev_unlock); diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 9a340728b846..79d812717406 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -693,6 +693,8 @@ static void r5c_disable_writeback_async(struct work_struct *work) struct r5l_log *log = container_of(work, struct r5l_log, disable_writeback_work); struct mddev *mddev = log->rdev->mddev; + struct r5conf *conf = mddev->private; + int locked = 0; if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) return; @@ -701,11 +703,15 @@ static void r5c_disable_writeback_async(struct work_struct *work) /* wait superblock change before suspend */ wait_event(mddev->sb_wait, - !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); - - mddev_suspend(mddev); - log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; - mddev_resume(mddev); + conf->log == NULL || + (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) && + (locked = mddev_trylock(mddev)))); + if (locked) { + mddev_suspend(mddev); + log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; + mddev_resume(mddev); + mddev_unlock(mddev); + } } static void r5l_submit_current_io(struct r5l_log *log) @@ -3161,6 +3167,8 @@ void r5l_exit_log(struct r5conf *conf) conf->log = NULL; synchronize_rcu(); + /* Ensure disable_writeback_work wakes up and exits */ + wake_up(&conf->mddev->sb_wait); flush_work(&log->disable_writeback_work); md_unregister_thread(&log->reclaim_thread); mempool_destroy(log->meta_pool); -- GitLab From cc091f3fbbdb117d819536d6249e34322f991899 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 17 Oct 2017 13:46:43 +1100 Subject: [PATCH 0035/1291] md: don't call bitmap_create() while array is quiesced. commit 52a0d49de3d592a3118e13f35985e3d99eaf43df upstream. bitmap_create() allocates memory with GFP_KERNEL and so can wait for IO. If called while the array is quiesced, it could wait indefinitely for write out to the array - deadlock. So call bitmap_create() before quiescing the array. Signed-off-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 757f12d49540..ac27fe200ecd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6645,22 +6645,26 @@ static int set_bitmap_file(struct mddev *mddev, int fd) return -ENOENT; /* cannot remove what isn't there */ err = 0; if (mddev->pers) { - mddev->pers->quiesce(mddev, 1); if (fd >= 0) { struct bitmap *bitmap; bitmap = bitmap_create(mddev, -1); + mddev->pers->quiesce(mddev, 1); if (!IS_ERR(bitmap)) { mddev->bitmap = bitmap; err = bitmap_load(mddev); } else err = PTR_ERR(bitmap); - } - if (fd < 0 || err) { + if (err) { + bitmap_destroy(mddev); + fd = -1; + } + mddev->pers->quiesce(mddev, 0); + } else if (fd < 0) { + mddev->pers->quiesce(mddev, 1); bitmap_destroy(mddev); - fd = -1; /* make sure to put the file */ + mddev->pers->quiesce(mddev, 0); } - mddev->pers->quiesce(mddev, 0); } if (fd < 0) { struct file *f = mddev->bitmap_info.file; @@ -6944,8 +6948,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->bitmap_info.default_offset; mddev->bitmap_info.space = mddev->bitmap_info.default_space; - mddev->pers->quiesce(mddev, 1); bitmap = bitmap_create(mddev, -1); + mddev->pers->quiesce(mddev, 1); if (!IS_ERR(bitmap)) { mddev->bitmap = bitmap; rv = bitmap_load(mddev); -- GitLab From feabea21655961e6b0f87ad7351a4f99515c6b09 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 17 Oct 2017 13:46:43 +1100 Subject: [PATCH 0036/1291] md: move suspend_hi/lo handling into core md code commit b3143b9a38d5039bcd1f2d1c94039651bfba8043 upstream. responding to ->suspend_lo and ->suspend_hi is similar to responding to ->suspended. It is best to wait in the common core code without incrementing ->active_io. This allows mddev_suspend()/mddev_resume() to work while requests are waiting for suspend_lo/hi to change. This is will be important after a subsequent patch which uses mddev_suspend() to synchronize updating for suspend_lo/hi. So move the code for testing suspend_lo/hi out of raid1.c and raid5.c, and place it in md.c Signed-off-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 29 +++++++++++++++++++++++------ drivers/md/raid1.c | 14 +++++--------- drivers/md/raid5.c | 22 ---------------------- 3 files changed, 28 insertions(+), 37 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index ac27fe200ecd..06ad5d798d73 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -266,16 +266,31 @@ static DEFINE_SPINLOCK(all_mddevs_lock); * call has finished, the bio has been linked into some internal structure * and so is visible to ->quiesce(), so we don't need the refcount any more. */ +static bool is_suspended(struct mddev *mddev, struct bio *bio) +{ + if (mddev->suspended) + return true; + if (bio_data_dir(bio) != WRITE) + return false; + if (mddev->suspend_lo >= mddev->suspend_hi) + return false; + if (bio->bi_iter.bi_sector >= mddev->suspend_hi) + return false; + if (bio_end_sector(bio) < mddev->suspend_lo) + return false; + return true; +} + void md_handle_request(struct mddev *mddev, struct bio *bio) { check_suspended: rcu_read_lock(); - if (mddev->suspended) { + if (is_suspended(mddev, bio)) { DEFINE_WAIT(__wait); for (;;) { prepare_to_wait(&mddev->sb_wait, &__wait, TASK_UNINTERRUPTIBLE); - if (!mddev->suspended) + if (!is_suspended(mddev, bio)) break; rcu_read_unlock(); schedule(); @@ -4849,10 +4864,11 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) goto unlock; old = mddev->suspend_lo; mddev->suspend_lo = new; - if (new >= old) + if (new >= old) { /* Shrinking suspended region */ + wake_up(&mddev->sb_wait); mddev->pers->quiesce(mddev, 2); - else { + } else { /* Expanding suspended region - need to wait */ mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 0); @@ -4892,10 +4908,11 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len) goto unlock; old = mddev->suspend_hi; mddev->suspend_hi = new; - if (new <= old) + if (new <= old) { /* Shrinking suspended region */ + wake_up(&mddev->sb_wait); mddev->pers->quiesce(mddev, 2); - else { + } else { /* Expanding suspended region - need to wait */ mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 0); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e4e01d3bab81..bd5976aefb55 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1298,11 +1298,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, */ - if ((bio_end_sector(bio) > mddev->suspend_lo && - bio->bi_iter.bi_sector < mddev->suspend_hi) || - (mddev_is_clustered(mddev) && + if (mddev_is_clustered(mddev) && md_cluster_ops->area_resyncing(mddev, WRITE, - bio->bi_iter.bi_sector, bio_end_sector(bio)))) { + bio->bi_iter.bi_sector, bio_end_sector(bio))) { /* * As the suspend_* range is controlled by userspace, we want @@ -1313,12 +1311,10 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, sigset_t full, old; prepare_to_wait(&conf->wait_barrier, &w, TASK_INTERRUPTIBLE); - if ((bio_end_sector(bio) <= mddev->suspend_lo || - bio->bi_iter.bi_sector >= mddev->suspend_hi) && - (!mddev_is_clustered(mddev) || - !md_cluster_ops->area_resyncing(mddev, WRITE, + if (!mddev_is_clustered(mddev) || + !md_cluster_ops->area_resyncing(mddev, WRITE, bio->bi_iter.bi_sector, - bio_end_sector(bio)))) + bio_end_sector(bio))) break; sigfillset(&full); sigprocmask(SIG_BLOCK, &full, &old); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index de1ef6264ee7..30c1dc17d5af 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5686,28 +5686,6 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) goto retry; } - if (rw == WRITE && - logical_sector >= mddev->suspend_lo && - logical_sector < mddev->suspend_hi) { - raid5_release_stripe(sh); - /* As the suspend_* range is controlled by - * userspace, we want an interruptible - * wait. - */ - prepare_to_wait(&conf->wait_for_overlap, - &w, TASK_INTERRUPTIBLE); - if (logical_sector >= mddev->suspend_lo && - logical_sector < mddev->suspend_hi) { - sigset_t full, old; - sigfillset(&full); - sigprocmask(SIG_BLOCK, &full, &old); - schedule(); - sigprocmask(SIG_SETMASK, &old, NULL); - do_prepare = true; - } - goto retry; - } - if (test_bit(STRIPE_EXPANDING, &sh->state) || !add_stripe_bio(sh, bi, dd_idx, rw, previous)) { /* Stripe is busy expanding or -- GitLab From 7c435e22453038ea29e6467be9afe05225d53de2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 17 Oct 2017 13:46:43 +1100 Subject: [PATCH 0037/1291] md: use mddev_suspend/resume instead of ->quiesce() commit 9e1cc0a54556a6c63dc0cfb7cd7d60d43337bba6 upstream. mddev_suspend() is a more general interface than calling ->quiesce() and is so more extensible. A future patch will make use of this. Signed-off-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 06ad5d798d73..3d1650db2064 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4870,8 +4870,8 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) mddev->pers->quiesce(mddev, 2); } else { /* Expanding suspended region - need to wait */ - mddev->pers->quiesce(mddev, 1); - mddev->pers->quiesce(mddev, 0); + mddev_suspend(mddev); + mddev_resume(mddev); } err = 0; unlock: @@ -4914,8 +4914,8 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len) mddev->pers->quiesce(mddev, 2); } else { /* Expanding suspended region - need to wait */ - mddev->pers->quiesce(mddev, 1); - mddev->pers->quiesce(mddev, 0); + mddev_suspend(mddev); + mddev_resume(mddev); } err = 0; unlock: @@ -6666,7 +6666,7 @@ static int set_bitmap_file(struct mddev *mddev, int fd) struct bitmap *bitmap; bitmap = bitmap_create(mddev, -1); - mddev->pers->quiesce(mddev, 1); + mddev_suspend(mddev); if (!IS_ERR(bitmap)) { mddev->bitmap = bitmap; err = bitmap_load(mddev); @@ -6676,11 +6676,11 @@ static int set_bitmap_file(struct mddev *mddev, int fd) bitmap_destroy(mddev); fd = -1; } - mddev->pers->quiesce(mddev, 0); + mddev_resume(mddev); } else if (fd < 0) { - mddev->pers->quiesce(mddev, 1); + mddev_suspend(mddev); bitmap_destroy(mddev); - mddev->pers->quiesce(mddev, 0); + mddev_resume(mddev); } } if (fd < 0) { @@ -6966,7 +6966,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->bitmap_info.space = mddev->bitmap_info.default_space; bitmap = bitmap_create(mddev, -1); - mddev->pers->quiesce(mddev, 1); + mddev_suspend(mddev); if (!IS_ERR(bitmap)) { mddev->bitmap = bitmap; rv = bitmap_load(mddev); @@ -6974,7 +6974,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) rv = PTR_ERR(bitmap); if (rv) bitmap_destroy(mddev); - mddev->pers->quiesce(mddev, 0); + mddev_resume(mddev); } else { /* remove the bitmap */ if (!mddev->bitmap) { @@ -6997,9 +6997,9 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->bitmap_info.nodes = 0; md_cluster_ops->leave(mddev); } - mddev->pers->quiesce(mddev, 1); + mddev_suspend(mddev); bitmap_destroy(mddev); - mddev->pers->quiesce(mddev, 0); + mddev_resume(mddev); mddev->bitmap_info.offset = 0; } } -- GitLab From ce57466d323b224bc817bbb07791b4ca111bd53e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 17 Oct 2017 13:46:43 +1100 Subject: [PATCH 0038/1291] md: allow metadata update while suspending. commit 35bfc52187f6df8779d0f1cebdb52b7f797baf4e upstream. There are various deadlocks that can occur when a thread holds reconfig_mutex and calls ->quiesce(mddev, 1). As some write request block waiting for metadata to be updated (e.g. to record device failure), and as the md thread updates the metadata while the reconfig mutex is held, holding the mutex can stop write requests completing, and this prevents ->quiesce(mddev, 1) from completing. ->quiesce() is now usually called from mddev_suspend(), and it is always called with reconfig_mutex held. So at this time it is safe for the thread to update metadata without explicitly taking the lock. So add 2 new flags, one which says the unlocked updates is allowed, and one which ways it is happening. Then allow it while the quiesce completes, and then wait for it to finish. Reported-and-tested-by: Xiao Ni Signed-off-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 14 ++++++++++++++ drivers/md/md.h | 6 ++++++ 2 files changed, 20 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 3d1650db2064..b74cca273e38 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -364,8 +364,12 @@ void mddev_suspend(struct mddev *mddev) return; synchronize_rcu(); wake_up(&mddev->sb_wait); + set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags); + smp_mb__after_atomic(); wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); mddev->pers->quiesce(mddev, 1); + clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags); + wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags)); del_timer_sync(&mddev->safemode_timer); } @@ -8882,6 +8886,16 @@ void md_check_recovery(struct mddev *mddev) unlock: wake_up(&mddev->sb_wait); mddev_unlock(mddev); + } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) { + /* Write superblock - thread that called mddev_suspend() + * holds reconfig_mutex for us. + */ + set_bit(MD_UPDATING_SB, &mddev->flags); + smp_mb__after_atomic(); + if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags)) + md_update_sb(mddev, 0); + clear_bit_unlock(MD_UPDATING_SB, &mddev->flags); + wake_up(&mddev->sb_wait); } } EXPORT_SYMBOL(md_check_recovery); diff --git a/drivers/md/md.h b/drivers/md/md.h index 9b0a896890ef..37c19b7b5df9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -237,6 +237,12 @@ enum mddev_flags { */ MD_HAS_PPL, /* The raid array has PPL feature set */ MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */ + MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update + * the metadata without taking reconfig_mutex. + */ + MD_UPDATING_SB, /* md_check_recovery is updating the metadata + * without explicitly holding reconfig_mutex. + */ }; enum mddev_sb_flags { -- GitLab From 2fc45ef962879d29f9567202e3a183fab5a7fd37 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 19 Oct 2017 12:49:15 +1100 Subject: [PATCH 0039/1291] md: remove special meaning of ->quiesce(.., 2) commit b03e0ccb5ab9df3efbe51c87843a1ffbecbafa1f upstream. The '2' argument means "wake up anything that is waiting". This is an inelegant part of the design and was added to help support management of suspend_lo/suspend_hi setting. Now that suspend_lo/hi is managed in mddev_suspend/resume, that need is gone. These is still a couple of places where we call 'quiesce' with an argument of '2', but they can safely be changed to call ->quiesce(.., 1); ->quiesce(.., 0) which achieve the same result at the small cost of pausing IO briefly. This removes a small "optimization" from suspend_{hi,lo}_store, but it isn't clear that optimization served a useful purpose. The code now is a lot clearer. Suggested-by: Shaohua Li Signed-off-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- drivers/md/md-cluster.c | 6 +++--- drivers/md/md.c | 34 ++++++++++------------------------ drivers/md/md.h | 9 ++++----- drivers/md/raid0.c | 2 +- drivers/md/raid1.c | 13 +++---------- drivers/md/raid10.c | 10 +++------- drivers/md/raid5-cache.c | 12 ++++++------ drivers/md/raid5-log.h | 2 +- drivers/md/raid5.c | 18 ++++++------------ 9 files changed, 37 insertions(+), 69 deletions(-) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 03082e17c65c..72ce0bccc865 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -442,10 +442,11 @@ static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot) static void remove_suspend_info(struct mddev *mddev, int slot) { struct md_cluster_info *cinfo = mddev->cluster_info; + mddev->pers->quiesce(mddev, 1); spin_lock_irq(&cinfo->suspend_lock); __remove_suspend_info(cinfo, slot); spin_unlock_irq(&cinfo->suspend_lock); - mddev->pers->quiesce(mddev, 2); + mddev->pers->quiesce(mddev, 0); } @@ -492,13 +493,12 @@ static void process_suspend_info(struct mddev *mddev, s->lo = lo; s->hi = hi; mddev->pers->quiesce(mddev, 1); - mddev->pers->quiesce(mddev, 0); spin_lock_irq(&cinfo->suspend_lock); /* Remove existing entry (if exists) before adding */ __remove_suspend_info(cinfo, slot); list_add(&s->list, &cinfo->suspend_list); spin_unlock_irq(&cinfo->suspend_lock); - mddev->pers->quiesce(mddev, 2); + mddev->pers->quiesce(mddev, 0); } static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg) diff --git a/drivers/md/md.c b/drivers/md/md.c index b74cca273e38..11a67eac55b1 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4850,7 +4850,7 @@ suspend_lo_show(struct mddev *mddev, char *page) static ssize_t suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) { - unsigned long long old, new; + unsigned long long new; int err; err = kstrtoull(buf, 10, &new); @@ -4866,17 +4866,10 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) if (mddev->pers == NULL || mddev->pers->quiesce == NULL) goto unlock; - old = mddev->suspend_lo; + mddev_suspend(mddev); mddev->suspend_lo = new; - if (new >= old) { - /* Shrinking suspended region */ - wake_up(&mddev->sb_wait); - mddev->pers->quiesce(mddev, 2); - } else { - /* Expanding suspended region - need to wait */ - mddev_suspend(mddev); - mddev_resume(mddev); - } + mddev_resume(mddev); + err = 0; unlock: mddev_unlock(mddev); @@ -4894,7 +4887,7 @@ suspend_hi_show(struct mddev *mddev, char *page) static ssize_t suspend_hi_store(struct mddev *mddev, const char *buf, size_t len) { - unsigned long long old, new; + unsigned long long new; int err; err = kstrtoull(buf, 10, &new); @@ -4907,20 +4900,13 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len) if (err) return err; err = -EINVAL; - if (mddev->pers == NULL || - mddev->pers->quiesce == NULL) + if (mddev->pers == NULL) goto unlock; - old = mddev->suspend_hi; + + mddev_suspend(mddev); mddev->suspend_hi = new; - if (new <= old) { - /* Shrinking suspended region */ - wake_up(&mddev->sb_wait); - mddev->pers->quiesce(mddev, 2); - } else { - /* Expanding suspended region - need to wait */ - mddev_suspend(mddev); - mddev_resume(mddev); - } + mddev_resume(mddev); + err = 0; unlock: mddev_unlock(mddev); diff --git a/drivers/md/md.h b/drivers/md/md.h index 37c19b7b5df9..11696aba94e3 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -546,12 +546,11 @@ struct md_personality int (*check_reshape) (struct mddev *mddev); int (*start_reshape) (struct mddev *mddev); void (*finish_reshape) (struct mddev *mddev); - /* quiesce moves between quiescence states - * 0 - fully active - * 1 - no new requests allowed - * others - reserved + /* quiesce suspends or resumes internal processing. + * 1 - stop new actions and wait for action io to complete + * 0 - return to normal behaviour */ - void (*quiesce) (struct mddev *mddev, int state); + void (*quiesce) (struct mddev *mddev, int quiesce); /* takeover is used to transition an array from one * personality to another. The new personality must be able * to handle the data in the current layout. diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 5a00fc118470..5ecba9eef441 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -768,7 +768,7 @@ static void *raid0_takeover(struct mddev *mddev) return ERR_PTR(-EINVAL); } -static void raid0_quiesce(struct mddev *mddev, int state) +static void raid0_quiesce(struct mddev *mddev, int quiesce) { } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index bd5976aefb55..029ecba60727 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3276,21 +3276,14 @@ static int raid1_reshape(struct mddev *mddev) return 0; } -static void raid1_quiesce(struct mddev *mddev, int state) +static void raid1_quiesce(struct mddev *mddev, int quiesce) { struct r1conf *conf = mddev->private; - switch(state) { - case 2: /* wake for suspend */ - wake_up(&conf->wait_barrier); - break; - case 1: + if (quiesce) freeze_array(conf, 0); - break; - case 0: + else unfreeze_array(conf); - break; - } } static void *raid1_takeover(struct mddev *mddev) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5fb31ef52945..b20c23f970f4 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3838,18 +3838,14 @@ static void raid10_free(struct mddev *mddev, void *priv) kfree(conf); } -static void raid10_quiesce(struct mddev *mddev, int state) +static void raid10_quiesce(struct mddev *mddev, int quiesce) { struct r10conf *conf = mddev->private; - switch(state) { - case 1: + if (quiesce) raise_barrier(conf, 0); - break; - case 0: + else lower_barrier(conf); - break; - } } static int raid10_resize(struct mddev *mddev, sector_t sectors) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 79d812717406..0d535b40cb3b 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1589,21 +1589,21 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space) md_wakeup_thread(log->reclaim_thread); } -void r5l_quiesce(struct r5l_log *log, int state) +void r5l_quiesce(struct r5l_log *log, int quiesce) { struct mddev *mddev; - if (!log || state == 2) + if (!log) return; - if (state == 0) - kthread_unpark(log->reclaim_thread->tsk); - else if (state == 1) { + + if (quiesce) { /* make sure r5l_write_super_and_discard_space exits */ mddev = log->rdev->mddev; wake_up(&mddev->sb_wait); kthread_park(log->reclaim_thread->tsk); r5l_wake_reclaim(log, MaxSector); r5l_do_reclaim(log); - } + } else + kthread_unpark(log->reclaim_thread->tsk); } bool r5l_log_disk_error(struct r5conf *conf) diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h index 7f9ad5f7cda0..284578b0a349 100644 --- a/drivers/md/raid5-log.h +++ b/drivers/md/raid5-log.h @@ -9,7 +9,7 @@ extern void r5l_write_stripe_run(struct r5l_log *log); extern void r5l_flush_stripe_to_raid(struct r5l_log *log); extern void r5l_stripe_write_finished(struct stripe_head *sh); extern int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio); -extern void r5l_quiesce(struct r5l_log *log, int state); +extern void r5l_quiesce(struct r5l_log *log, int quiesce); extern bool r5l_log_disk_error(struct r5conf *conf); extern bool r5c_is_writeback(struct r5l_log *log); extern int diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 30c1dc17d5af..07ca2fd10189 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -8003,16 +8003,12 @@ static void raid5_finish_reshape(struct mddev *mddev) } } -static void raid5_quiesce(struct mddev *mddev, int state) +static void raid5_quiesce(struct mddev *mddev, int quiesce) { struct r5conf *conf = mddev->private; - switch(state) { - case 2: /* resume for a suspend */ - wake_up(&conf->wait_for_overlap); - break; - - case 1: /* stop all writes */ + if (quiesce) { + /* stop all writes */ lock_all_device_hash_locks_irq(conf); /* '2' tells resync/reshape to pause so that all * active stripes can drain @@ -8028,17 +8024,15 @@ static void raid5_quiesce(struct mddev *mddev, int state) unlock_all_device_hash_locks_irq(conf); /* allow reshape to continue */ wake_up(&conf->wait_for_overlap); - break; - - case 0: /* re-enable writes */ + } else { + /* re-enable writes */ lock_all_device_hash_locks_irq(conf); conf->quiesce = 0; wake_up(&conf->wait_for_quiescent); wake_up(&conf->wait_for_overlap); unlock_all_device_hash_locks_irq(conf); - break; } - r5l_quiesce(conf->log, state); + r5l_quiesce(conf->log, quiesce); } static void *raid45_takeover_raid0(struct mddev *mddev, int level) -- GitLab From 3f8e85fbbaa55c9981e27ae9e5182569b393189b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 15 Feb 2018 00:23:05 +0100 Subject: [PATCH 0040/1291] netfilter: don't set F_IFACE on ipv6 fib lookups commit 47b7e7f82802dced3ac73658bf4b77584a63063f upstream. "fib" starts to behave strangely when an ipv6 default route is added - the FIB lookup returns a route using 'oif' in this case. This behaviour was inherited from ip6tables rpfilter so change this as well. Bugzilla: https://bugzilla.netfilter.org/show_bug.cgi?id=1221 Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv6/netfilter/ip6t_rpfilter.c | 4 ---- net/ipv6/netfilter/nft_fib_ipv6.c | 12 ++---------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index b12e61b7b16c..3119e720a6c8 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -48,10 +48,6 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, } fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; - if ((flags & XT_RPFILTER_LOOSE) == 0) { - fl6.flowi6_oif = dev->ifindex; - lookup_flags |= RT6_LOOKUP_F_IFACE; - } rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags); if (rt->dst.error) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 54b5899543ef..fd9a45cbd709 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -182,7 +182,6 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, } *dest = 0; - again: rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags); if (rt->dst.error) goto put_rt_err; @@ -191,15 +190,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) goto put_rt_err; - if (oif && oif != rt->rt6i_idev->dev) { - /* multipath route? Try again with F_IFACE */ - if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) { - lookup_flags |= RT6_LOOKUP_F_IFACE; - fl6.flowi6_oif = oif->ifindex; - ip6_rt_put(rt); - goto again; - } - } + if (oif && oif != rt->rt6i_idev->dev) + goto put_rt_err; switch (priv->result) { case NFT_FIB_RESULT_OIF: -- GitLab From 5acd64888e020f0ce758fa14e65a75692dcc6e37 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sun, 20 May 2018 13:03:38 +0200 Subject: [PATCH 0041/1291] netfilter: ip6t_rpfilter: provide input interface for route lookup commit cede24d1b21d68d84ac5a36c44f7d37daadcc258 upstream. In commit 47b7e7f82802, this bit was removed at the same time the RT6_LOOKUP_F_IFACE flag was removed. However, it is needed when link-local addresses are used, which is a very common case: when packets are routed, neighbor solicitations are done using link-local addresses. For example, the following neighbor solicitation is not matched by "-m rpfilter": IP6 fe80::5254:33ff:fe00:1 > ff02::1:ff00:3: ICMP6, neighbor solicitation, who has 2001:db8::5254:33ff:fe00:3, length 32 Commit 47b7e7f82802 doesn't quite explain why we shouldn't use RT6_LOOKUP_F_IFACE in the rpfilter case. I suppose the interface check later in the function would make it redundant. However, the remaining of the routing code is using RT6_LOOKUP_F_IFACE when there is no source address (which matches rpfilter's case with a non-unicast destination, like with neighbor solicitation). Signed-off-by: Vincent Bernat Fixes: 47b7e7f82802 ("netfilter: don't set F_IFACE on ipv6 fib lookups") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv6/netfilter/ip6t_rpfilter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 3119e720a6c8..1c4a5de3f301 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -48,6 +48,8 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, } fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; + if ((flags & XT_RPFILTER_LOOSE) == 0) + fl6.flowi6_oif = dev->ifindex; rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags); if (rt->dst.error) -- GitLab From 259cc05cce439944121a7c26e439713b873af8b2 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 11 Jun 2018 22:16:33 +0900 Subject: [PATCH 0042/1291] netfilter: nf_tables: use WARN_ON_ONCE instead of BUG_ON in nft_do_chain() commit adc972c5b88829d38ede08b1069718661c7330ae upstream. When depth of chain is bigger than NFT_JUMP_STACK_SIZE, the nft_do_chain crashes. But there is no need to crash hard here. Suggested-by: Florian Westphal Signed-off-by: Taehee Yoo Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 40e744572283..32b7896929f3 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -208,7 +208,8 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) switch (regs.verdict.code) { case NFT_JUMP: - BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); + if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE)) + return NF_DROP; jumpstack[stackptr].chain = chain; jumpstack[stackptr].rule = rule; jumpstack[stackptr].rulenum = rulenum; -- GitLab From 0ed70f20644959caf01a89b17fccebd19b204d92 Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Tue, 22 May 2018 19:45:09 +0200 Subject: [PATCH 0043/1291] ARM: dts: imx6q: Use correct SDMA script for SPI5 core commit df07101e1c4a29e820df02f9989a066988b160e6 upstream. According to the reference manual the shp_2_mcu / mcu_2_shp scripts must be used for devices connected through the SPBA. This fixes an issue we saw with DMA transfers. Sometimes the SPI controller RX FIFO was not empty after a DMA transfer and the driver got stuck in the next PIO transfer when it read one word more than expected. commit dd4b487b32a35 ("ARM: dts: imx6: Use correct SDMA script for SPI cores") is fixing the same issue but only for SPI1 - 4. Fixes: 677940258dd8e ("ARM: dts: imx6q: enable dma for ecspi5") Signed-off-by: Sean Nyekjaer Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx6q.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi index 90a741732f60..4747ede61acd 100644 --- a/arch/arm/boot/dts/imx6q.dtsi +++ b/arch/arm/boot/dts/imx6q.dtsi @@ -96,7 +96,7 @@ ecspi5: ecspi@02018000 { clocks = <&clks IMX6Q_CLK_ECSPI5>, <&clks IMX6Q_CLK_ECSPI5>; clock-names = "ipg", "per"; - dmas = <&sdma 11 7 1>, <&sdma 12 7 2>; + dmas = <&sdma 11 8 1>, <&sdma 12 8 2>; dma-names = "rx", "tx"; status = "disabled"; }; -- GitLab From 693d06dffb436ae22c899e0ffd7c28064dd8ec32 Mon Sep 17 00:00:00 2001 From: Abhishek Sahu Date: Wed, 13 Jun 2018 14:32:36 +0530 Subject: [PATCH 0044/1291] mtd: rawnand: fix return value check for bad block status commit e9893e6fa932f42c90c4ac5849fa9aa0f0f00a34 upstream. Positive return value from read_oob() is making false BAD blocks. For some of the NAND controllers, OOB bytes will be protected with ECC and read_oob() will return number of bitflips. If there is any bitflip in ECC protected OOB bytes for BAD block status page, then that block is getting treated as BAD. Fixes: c120e75e0e7d ("mtd: nand: use read_oob() instead of cmdfunc() for bad block check") Cc: Signed-off-by: Abhishek Sahu Reviewed-by: Miquel Raynal Signed-off-by: Boris Brezillon [backported to 4.14.y] Signed-off-by: Abhishek Sahu Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/nand_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 528e04f96c13..d410de331854 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -440,7 +440,7 @@ static int nand_block_bad(struct mtd_info *mtd, loff_t ofs) for (; page < page_end; page++) { res = chip->ecc.read_oob(mtd, chip, page); - if (res) + if (res < 0) return res; bad = chip->oob_poi[chip->badblockpos]; -- GitLab From 4cf1fbcdef7f04605333a1786f619122c135c0fe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 12 May 2018 02:49:30 -0700 Subject: [PATCH 0045/1291] xfrm6: avoid potential infinite loop in _decode_session6() [ Upstream commit d9f92772e8ec388d070752ee8f187ef8fa18621f ] syzbot found a way to trigger an infinitie loop by overflowing @offset variable that has been forced to use u16 for some very obscure reason in the past. We probably want to look at NEXTHDR_FRAGMENT handling which looks wrong, in a separate patch. In net-next, we shall try to use skb_header_pointer() instead of pskb_may_pull(). watchdog: BUG: soft lockup - CPU#1 stuck for 134s! [syz-executor738:4553] Modules linked in: irq event stamp: 13885653 hardirqs last enabled at (13885652): [] restore_regs_and_return_to_kernel+0x0/0x2b hardirqs last disabled at (13885653): [] interrupt_entry+0xb5/0xf0 arch/x86/entry/entry_64.S:625 softirqs last enabled at (13614028): [] tun_napi_alloc_frags drivers/net/tun.c:1478 [inline] softirqs last enabled at (13614028): [] tun_get_user+0x1dd9/0x4290 drivers/net/tun.c:1825 softirqs last disabled at (13614032): [] tun_get_user+0x313f/0x4290 drivers/net/tun.c:1942 CPU: 1 PID: 4553 Comm: syz-executor738 Not tainted 4.17.0-rc3+ #40 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:check_kcov_mode kernel/kcov.c:67 [inline] RIP: 0010:__sanitizer_cov_trace_pc+0x20/0x50 kernel/kcov.c:101 RSP: 0018:ffff8801d8cfe250 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffff8801d88a8080 RBX: ffff8801d7389e40 RCX: 0000000000000006 RDX: 0000000000000000 RSI: ffffffff868da4ad RDI: ffff8801c8a53277 RBP: ffff8801d8cfe250 R08: ffff8801d88a8080 R09: ffff8801d8cfe3e8 R10: ffffed003b19fc87 R11: ffff8801d8cfe43f R12: ffff8801c8a5327f R13: 0000000000000000 R14: ffff8801c8a4e5fe R15: ffff8801d8cfe3e8 FS: 0000000000d88940(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffff600400 CR3: 00000001acab3000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: _decode_session6+0xc1d/0x14f0 net/ipv6/xfrm6_policy.c:150 __xfrm_decode_session+0x71/0x140 net/xfrm/xfrm_policy.c:2368 xfrm_decode_session_reverse include/net/xfrm.h:1213 [inline] icmpv6_route_lookup+0x395/0x6e0 net/ipv6/icmp.c:372 icmp6_send+0x1982/0x2da0 net/ipv6/icmp.c:551 icmpv6_send+0x17a/0x300 net/ipv6/ip6_icmp.c:43 ip6_input_finish+0x14e1/0x1a30 net/ipv6/ip6_input.c:305 NF_HOOK include/linux/netfilter.h:288 [inline] ip6_input+0xe1/0x5e0 net/ipv6/ip6_input.c:327 dst_input include/net/dst.h:450 [inline] ip6_rcv_finish+0x29c/0xa10 net/ipv6/ip6_input.c:71 NF_HOOK include/linux/netfilter.h:288 [inline] ipv6_rcv+0xeb8/0x2040 net/ipv6/ip6_input.c:208 __netif_receive_skb_core+0x2468/0x3650 net/core/dev.c:4646 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4711 netif_receive_skb_internal+0x126/0x7b0 net/core/dev.c:4785 napi_frags_finish net/core/dev.c:5226 [inline] napi_gro_frags+0x631/0xc40 net/core/dev.c:5299 tun_get_user+0x3168/0x4290 drivers/net/tun.c:1951 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:1996 call_write_iter include/linux/fs.h:1784 [inline] do_iter_readv_writev+0x859/0xa50 fs/read_write.c:680 do_iter_write+0x185/0x5f0 fs/read_write.c:959 vfs_writev+0x1c7/0x330 fs/read_write.c:1004 do_writev+0x112/0x2f0 fs/read_write.c:1039 __do_sys_writev fs/read_write.c:1112 [inline] __se_sys_writev fs/read_write.c:1109 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1109 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Cc: Steffen Klassert Cc: Nicolas Dichtel Reported-by: syzbot+0053c8...@syzkaller.appspotmail.com Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv6/xfrm6_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 17e95a0386b3..d6b012295b45 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -123,7 +123,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) struct flowi6 *fl6 = &fl->u.ip6; int onlyproto = 0; const struct ipv6hdr *hdr = ipv6_hdr(skb); - u16 offset = sizeof(*hdr); + u32 offset = sizeof(*hdr); struct ipv6_opt_hdr *exthdr; const unsigned char *nh = skb_network_header(skb); u16 nhoff = IP6CB(skb)->nhoff; -- GitLab From e36bc9930d85f4ae29b67eb71bc66ae49e217b71 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 16 May 2018 21:25:46 +0100 Subject: [PATCH 0046/1291] afs: Fix directory permissions check [ Upstream commit 378831e4daec75fbba6d3612bcf3b4dd00ddbf08 ] Doing faccessat("/afs/some/directory", 0) triggers a BUG in the permissions check code. Fix this by just removing the BUG section. If no permissions are asked for, just return okay if the file exists. Also: (1) Split up the directory check so that it has separate if-statements rather than if-else-if (e.g. checking for MAY_EXEC shouldn't skip the check for MAY_READ and MAY_WRITE). (2) Check for MAY_CHDIR as MAY_EXEC. Without the main fix, the following BUG may occur: kernel BUG at fs/afs/security.c:386! invalid opcode: 0000 [#1] SMP PTI ... RIP: 0010:afs_permission+0x19d/0x1a0 [kafs] ... Call Trace: ? inode_permission+0xbe/0x180 ? do_faccessat+0xdc/0x270 ? do_syscall_64+0x60/0x1f0 ? entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 00d3b7a4533e ("[AFS]: Add security support.") Reported-by: Jonathan Billings Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/afs/security.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/afs/security.c b/fs/afs/security.c index faca66227ecf..859096e25f2c 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -323,18 +323,14 @@ int afs_permission(struct inode *inode, int mask) mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file"); if (S_ISDIR(inode->i_mode)) { - if (mask & MAY_EXEC) { + if (mask & (MAY_EXEC | MAY_READ | MAY_CHDIR)) { if (!(access & AFS_ACE_LOOKUP)) goto permission_denied; - } else if (mask & MAY_READ) { - if (!(access & AFS_ACE_LOOKUP)) - goto permission_denied; - } else if (mask & MAY_WRITE) { + } + if (mask & MAY_WRITE) { if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */ AFS_ACE_INSERT))) /* create, mkdir, symlink, rename to */ goto permission_denied; - } else { - BUG(); } } else { if (!(access & AFS_ACE_LOOKUP)) -- GitLab From db73501ebc3ad56b94aa5adb7365a9a4d5313523 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 27 Apr 2018 10:45:31 +0200 Subject: [PATCH 0047/1291] netfilter: ebtables: handle string from userspace with care [ Upstream commit 94c752f99954797da583a84c4907ff19e92550a4 ] strlcpy() can't be safely used on a user-space provided string, as it can try to read beyond the buffer's end, if the latter is not NULL terminated. Leveraging the above, syzbot has been able to trigger the following splat: BUG: KASAN: stack-out-of-bounds in strlcpy include/linux/string.h:300 [inline] BUG: KASAN: stack-out-of-bounds in compat_mtw_from_user net/bridge/netfilter/ebtables.c:1957 [inline] BUG: KASAN: stack-out-of-bounds in ebt_size_mwt net/bridge/netfilter/ebtables.c:2059 [inline] BUG: KASAN: stack-out-of-bounds in size_entry_mwt net/bridge/netfilter/ebtables.c:2155 [inline] BUG: KASAN: stack-out-of-bounds in compat_copy_entries+0x96c/0x14a0 net/bridge/netfilter/ebtables.c:2194 Write of size 33 at addr ffff8801b0abf888 by task syz-executor0/4504 CPU: 0 PID: 4504 Comm: syz-executor0 Not tainted 4.17.0-rc2+ #40 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1b9/0x294 lib/dump_stack.c:113 print_address_description+0x6c/0x20b mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 check_memory_region_inline mm/kasan/kasan.c:260 [inline] check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267 memcpy+0x37/0x50 mm/kasan/kasan.c:303 strlcpy include/linux/string.h:300 [inline] compat_mtw_from_user net/bridge/netfilter/ebtables.c:1957 [inline] ebt_size_mwt net/bridge/netfilter/ebtables.c:2059 [inline] size_entry_mwt net/bridge/netfilter/ebtables.c:2155 [inline] compat_copy_entries+0x96c/0x14a0 net/bridge/netfilter/ebtables.c:2194 compat_do_replace+0x483/0x900 net/bridge/netfilter/ebtables.c:2285 compat_do_ebt_set_ctl+0x2ac/0x324 net/bridge/netfilter/ebtables.c:2367 compat_nf_sockopt net/netfilter/nf_sockopt.c:144 [inline] compat_nf_setsockopt+0x9b/0x140 net/netfilter/nf_sockopt.c:156 compat_ip_setsockopt+0xff/0x140 net/ipv4/ip_sockglue.c:1279 inet_csk_compat_setsockopt+0x97/0x120 net/ipv4/inet_connection_sock.c:1041 compat_tcp_setsockopt+0x49/0x80 net/ipv4/tcp.c:2901 compat_sock_common_setsockopt+0xb4/0x150 net/core/sock.c:3050 __compat_sys_setsockopt+0x1ab/0x7c0 net/compat.c:403 __do_compat_sys_setsockopt net/compat.c:416 [inline] __se_compat_sys_setsockopt net/compat.c:413 [inline] __ia32_compat_sys_setsockopt+0xbd/0x150 net/compat.c:413 do_syscall_32_irqs_on arch/x86/entry/common.c:323 [inline] do_fast_syscall_32+0x345/0xf9b arch/x86/entry/common.c:394 entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139 RIP: 0023:0xf7fb3cb9 RSP: 002b:00000000fff0c26c EFLAGS: 00000282 ORIG_RAX: 000000000000016e RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000000000 RDX: 0000000000000080 RSI: 0000000020000300 RDI: 00000000000005f4 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 The buggy address belongs to the page: page:ffffea0006c2afc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0x2fffc0000000000() raw: 02fffc0000000000 0000000000000000 0000000000000000 00000000ffffffff raw: 0000000000000000 ffffea0006c20101 0000000000000000 0000000000000000 page dumped because: kasan: bad access detected Fix the issue replacing the unsafe function with strscpy() and taking care of possible errors. Fixes: 81e675c227ec ("netfilter: ebtables: add CONFIG_COMPAT support") Reported-and-tested-by: syzbot+4e42a04e0bc33cb6c087@syzkaller.appspotmail.com Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/bridge/netfilter/ebtables.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 5b8cd359c4c0..e27fb6e97d18 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1950,7 +1950,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, int off, pad = 0; unsigned int size_kern, match_size = mwt->match_size; - strlcpy(name, mwt->u.name, sizeof(name)); + if (strscpy(name, mwt->u.name, sizeof(name)) < 0) + return -EINVAL; if (state->buf_kern_start) dst = state->buf_kern_start + state->buf_kern_offset; -- GitLab From 510e1e8020a8a1b7cf53a84f18a7d03757daba55 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 15 May 2018 14:05:13 +0200 Subject: [PATCH 0048/1291] s390/dasd: use blk_mq_rq_from_pdu for per request data [ Upstream commit f0f59a2fab8e52b9d582b39da39f22230ca80aee ] Dasd uses completion_data from struct request to store per request private data - this is problematic since this member is part of a union which is also used by IO schedulers. Let the block layer maintain space for per request data behind each struct request. Fixes crashes on block layer timeouts like this one: Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 0000000000000000 TEID: 0000000000000483 Fault in home space mode while using kernel ASCE. AS:0000000001308007 R3:00000000fffc8007 S:00000000fffcc000 P:000000000000013d Oops: 0004 ilc:2 [#1] PREEMPT SMP Modules linked in: [...] CPU: 0 PID: 1480 Comm: kworker/0:2H Not tainted 4.17.0-rc4-00046-gaa3bcd43b5af #203 Hardware name: IBM 3906 M02 702 (LPAR) Workqueue: kblockd blk_mq_timeout_work Krnl PSW : 0000000067ac406b 00000000b6960308 (do_raw_spin_trylock+0x30/0x78) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 0000000000000c00 0000000000000000 0000000000000000 0000000000000001 0000000000b9d3c8 0000000000000000 0000000000000001 00000000cf9639d8 0000000000000000 0700000000000000 0000000000000000 000000000099f09e 0000000000000000 000000000076e9d0 000000006247bb08 000000006247bae0 Krnl Code: 00000000001c159c: b90400c2 lgr %r12,%r2 00000000001c15a0: a7180000 lhi %r1,0 #00000000001c15a4: 583003a4 l %r3,932 >00000000001c15a8: ba132000 cs %r1,%r3,0(%r2) 00000000001c15ac: a7180001 lhi %r1,1 00000000001c15b0: a784000b brc 8,1c15c6 00000000001c15b4: c0e5004e72aa brasl %r14,b8fb08 00000000001c15ba: 1812 lr %r1,%r2 Call Trace: ([<0700000000000000>] 0x700000000000000) [<0000000000b9d3d2>] _raw_spin_lock_irqsave+0x7a/0xb8 [<000000000099f09e>] dasd_times_out+0x46/0x278 [<000000000076ea6e>] blk_mq_terminate_expired+0x9e/0x108 [<000000000077497a>] bt_for_each+0x102/0x130 [<0000000000774e54>] blk_mq_queue_tag_busy_iter+0x74/0xd8 [<000000000076fea0>] blk_mq_timeout_work+0x260/0x320 [<0000000000169dd4>] process_one_work+0x3bc/0x708 [<000000000016a382>] worker_thread+0x262/0x408 [<00000000001723a8>] kthread+0x160/0x178 [<0000000000b9e73a>] kernel_thread_starter+0x6/0xc [<0000000000b9e734>] kernel_thread_starter+0x0/0xc INFO: lockdep is turned off. Last Breaking-Event-Address: [<0000000000b9d3cc>] _raw_spin_lock_irqsave+0x74/0xb8 Kernel panic - not syncing: Fatal exception: panic_on_oops Signed-off-by: Sebastian Ott Reviewed-by: Stefan Haberland Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/dasd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index e67c1d8a193d..d072f84a8535 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3049,7 +3049,8 @@ static blk_status_t do_dasd_request(struct blk_mq_hw_ctx *hctx, cqr->callback_data = req; cqr->status = DASD_CQR_FILLED; cqr->dq = dq; - req->completion_data = cqr; + *((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req)) = cqr; + blk_mq_start_request(req); spin_lock(&block->queue_lock); list_add_tail(&cqr->blocklist, &block->ccw_queue); @@ -3073,12 +3074,13 @@ static blk_status_t do_dasd_request(struct blk_mq_hw_ctx *hctx, */ enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved) { - struct dasd_ccw_req *cqr = req->completion_data; struct dasd_block *block = req->q->queuedata; struct dasd_device *device; + struct dasd_ccw_req *cqr; unsigned long flags; int rc = 0; + cqr = *((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req)); if (!cqr) return BLK_EH_NOT_HANDLED; @@ -3184,6 +3186,7 @@ static int dasd_alloc_queue(struct dasd_block *block) int rc; block->tag_set.ops = &dasd_mq_ops; + block->tag_set.cmd_size = sizeof(struct dasd_ccw_req *); block->tag_set.nr_hw_queues = DASD_NR_HW_QUEUES; block->tag_set.queue_depth = DASD_MAX_LCU_DEV * DASD_REQ_PER_DEV; block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; -- GitLab From 27aa533f24e9951aff62b3b6a14c1feed8cbf624 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 16 May 2018 22:58:33 +0200 Subject: [PATCH 0049/1291] netfilter: nft_limit: fix packet ratelimiting [ Upstream commit 3e0f64b7dd3149f75e8652ff1df56cffeedc8fc1 ] Credit calculations for the packet ratelimiting are not correct, as per the applied ratelimit of 25/second and burst 8, a total of 33 packets should have been accepted. This is true in iptables(33) but not in nftables (~65). For packet ratelimiting, use: div_u64(limit->nsecs, limit->rate) * limit->burst; to calculate credit, just like in iptables' xt_limit does. Moreover, use default burst in iptables, users are expecting similar behaviour. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_limit.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index a9fc298ef4c3..72f13a1144dd 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -51,10 +51,13 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) return !limit->invert; } +/* Use same default as in iptables. */ +#define NFT_LIMIT_PKT_BURST_DEFAULT 5 + static int nft_limit_init(struct nft_limit *limit, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], bool pkts) { - u64 unit; + u64 unit, tokens; if (tb[NFTA_LIMIT_RATE] == NULL || tb[NFTA_LIMIT_UNIT] == NULL) @@ -68,18 +71,25 @@ static int nft_limit_init(struct nft_limit *limit, if (tb[NFTA_LIMIT_BURST]) limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST])); - else - limit->burst = 0; + + if (pkts && limit->burst == 0) + limit->burst = NFT_LIMIT_PKT_BURST_DEFAULT; if (limit->rate + limit->burst < limit->rate) return -EOVERFLOW; - /* The token bucket size limits the number of tokens can be - * accumulated. tokens_max specifies the bucket size. - * tokens_max = unit * (rate + burst) / rate. - */ - limit->tokens = div_u64(limit->nsecs * (limit->rate + limit->burst), - limit->rate); + if (pkts) { + tokens = div_u64(limit->nsecs, limit->rate) * limit->burst; + } else { + /* The token bucket size limits the number of tokens can be + * accumulated. tokens_max specifies the bucket size. + * tokens_max = unit * (rate + burst) / rate. + */ + tokens = div_u64(limit->nsecs * (limit->rate + limit->burst), + limit->rate); + } + + limit->tokens = tokens; limit->tokens_max = limit->tokens; if (tb[NFTA_LIMIT_FLAGS]) { @@ -144,7 +154,7 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx, struct nft_limit_pkts *priv = nft_expr_priv(expr); int err; - err = nft_limit_init(&priv->limit, tb); + err = nft_limit_init(&priv->limit, tb, true); if (err < 0) return err; @@ -185,7 +195,7 @@ static int nft_limit_bytes_init(const struct nft_ctx *ctx, { struct nft_limit *priv = nft_expr_priv(expr); - return nft_limit_init(priv, tb); + return nft_limit_init(priv, tb, false); } static int nft_limit_bytes_dump(struct sk_buff *skb, @@ -246,7 +256,7 @@ static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx, struct nft_limit_pkts *priv = nft_obj_data(obj); int err; - err = nft_limit_init(&priv->limit, tb); + err = nft_limit_init(&priv->limit, tb, true); if (err < 0) return err; @@ -289,7 +299,7 @@ static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx, { struct nft_limit *priv = nft_obj_data(obj); - return nft_limit_init(priv, tb); + return nft_limit_init(priv, tb, false); } static int nft_limit_obj_bytes_dump(struct sk_buff *skb, -- GitLab From 4abab5dca7237e89f62069587f64feebc56dc105 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 19 May 2018 18:22:35 +0300 Subject: [PATCH 0050/1291] ipvs: fix buffer overflow with sync daemon and service [ Upstream commit 52f96757905bbf0edef47f3ee6c7c784e7f8ff8a ] syzkaller reports for buffer overflow for interface name when starting sync daemons [1] What we do is that we copy user structure into larger stack buffer but later we search NUL past the stack buffer. The same happens for sched_name when adding/editing virtual server. We are restricted by IP_VS_SCHEDNAME_MAXLEN and IP_VS_IFNAME_MAXLEN being used as size in include/uapi/linux/ip_vs.h, so they include the space for NUL. As using strlcpy is wrong for unsafe source, replace it with strscpy and add checks to return EINVAL if source string is not NUL-terminated. The incomplete strlcpy fix comes from 2.6.13. For the netlink interface reduce the len parameter for IPVS_DAEMON_ATTR_MCAST_IFN and IPVS_SVC_ATTR_SCHED_NAME, so that we get proper EINVAL. [1] kernel BUG at lib/string.c:1052! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 373 Comm: syz-executor936 Not tainted 4.17.0-rc4+ #45 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:fortify_panic+0x13/0x20 lib/string.c:1051 RSP: 0018:ffff8801c976f800 EFLAGS: 00010282 RAX: 0000000000000022 RBX: 0000000000000040 RCX: 0000000000000000 RDX: 0000000000000022 RSI: ffffffff8160f6f1 RDI: ffffed00392edef6 RBP: ffff8801c976f800 R08: ffff8801cf4c62c0 R09: ffffed003b5e4fb0 R10: ffffed003b5e4fb0 R11: ffff8801daf27d87 R12: ffff8801c976fa20 R13: ffff8801c976fae4 R14: ffff8801c976fae0 R15: 000000000000048b FS: 00007fd99f75e700(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200001c0 CR3: 00000001d6843000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: strlen include/linux/string.h:270 [inline] strlcpy include/linux/string.h:293 [inline] do_ip_vs_set_ctl+0x31c/0x1d00 net/netfilter/ipvs/ip_vs_ctl.c:2388 nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] nf_setsockopt+0x7d/0xd0 net/netfilter/nf_sockopt.c:115 ip_setsockopt+0xd8/0xf0 net/ipv4/ip_sockglue.c:1253 udp_setsockopt+0x62/0xa0 net/ipv4/udp.c:2487 ipv6_setsockopt+0x149/0x170 net/ipv6/ipv6_sockglue.c:917 tcp_setsockopt+0x93/0xe0 net/ipv4/tcp.c:3057 sock_common_setsockopt+0x9a/0xe0 net/core/sock.c:3046 __sys_setsockopt+0x1bd/0x390 net/socket.c:1903 __do_sys_setsockopt net/socket.c:1914 [inline] __se_sys_setsockopt net/socket.c:1911 [inline] __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1911 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x447369 RSP: 002b:00007fd99f75dda8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 00000000006e39e4 RCX: 0000000000447369 RDX: 000000000000048b RSI: 0000000000000000 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000018 R09: 0000000000000000 R10: 00000000200001c0 R11: 0000000000000246 R12: 00000000006e39e0 R13: 75a1ff93f0896195 R14: 6f745f3168746576 R15: 0000000000000001 Code: 08 5b 41 5c 41 5d 41 5e 41 5f 5d c3 0f 0b 48 89 df e8 d2 8f 48 fa eb de 55 48 89 fe 48 c7 c7 60 65 64 88 48 89 e5 e8 91 dd f3 f9 <0f> 0b 90 90 90 90 90 90 90 90 90 90 90 55 48 89 e5 41 57 41 56 RIP: fortify_panic+0x13/0x20 lib/string.c:1051 RSP: ffff8801c976f800 Reported-and-tested-by: syzbot+aac887f77319868646df@syzkaller.appspotmail.com Fixes: e4ff67513096 ("ipvs: add sync_maxlen parameter for the sync daemon") Fixes: 4da62fc70d7c ("[IPVS]: Fix for overflows") Signed-off-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_ctl.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index e8f1556fa446..327ebe786eeb 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2384,8 +2384,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) struct ipvs_sync_daemon_cfg cfg; memset(&cfg, 0, sizeof(cfg)); - strlcpy(cfg.mcast_ifn, dm->mcast_ifn, - sizeof(cfg.mcast_ifn)); + ret = -EINVAL; + if (strscpy(cfg.mcast_ifn, dm->mcast_ifn, + sizeof(cfg.mcast_ifn)) <= 0) + goto out_dec; cfg.syncid = dm->syncid; ret = start_sync_thread(ipvs, &cfg, dm->state); } else { @@ -2423,12 +2425,19 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) } } + if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) && + strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) == + IP_VS_SCHEDNAME_MAXLEN) { + ret = -EINVAL; + goto out_unlock; + } + /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */ if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP && usvc.protocol != IPPROTO_SCTP) { - pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n", + pr_err("set_ctl: invalid protocol: %d %pI4:%d\n", usvc.protocol, &usvc.addr.ip, - ntohs(usvc.port), usvc.sched_name); + ntohs(usvc.port)); ret = -EFAULT; goto out_unlock; } @@ -2850,7 +2859,7 @@ static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, - .len = IP_VS_IFNAME_MAXLEN }, + .len = IP_VS_IFNAME_MAXLEN - 1 }, [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 }, [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 }, @@ -2868,7 +2877,7 @@ static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, - .len = IP_VS_SCHEDNAME_MAXLEN }, + .len = IP_VS_SCHEDNAME_MAXLEN - 1 }, [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING, .len = IP_VS_PENAME_MAXLEN }, [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, -- GitLab From 3ee6bd9411a60f90b5adeea5440cbac302c3e9d0 Mon Sep 17 00:00:00 2001 From: Hao Wei Tee Date: Tue, 29 May 2018 10:25:17 +0300 Subject: [PATCH 0051/1291] iwlwifi: pcie: compare with number of IRQs requested for, not number of CPUs [ Upstream commit ab1068d6866e28bf6427ceaea681a381e5870a4a ] When there are 16 or more logical CPUs, we request for `IWL_MAX_RX_HW_QUEUES` (16) IRQs only as we limit to that number of IRQs, but later on we compare the number of IRQs returned to nr_online_cpus+2 instead of max_irqs, the latter being what we actually asked for. This ends up setting num_rx_queues to 17 which causes lots of out-of-bounds array accesses later on. Compare to max_irqs instead, and also add an assertion in case num_rx_queues > IWM_MAX_RX_HW_QUEUES. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=199551 Fixes: 2e5d4a8f61dc ("iwlwifi: pcie: Add new configuration to enable MSIX") Signed-off-by: Hao Wei Tee Tested-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 12a9b86d71ea..dffa697d71e0 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1499,14 +1499,13 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - int max_irqs, num_irqs, i, ret, nr_online_cpus; + int max_irqs, num_irqs, i, ret; u16 pci_cmd; if (!trans->cfg->mq_rx_supported) goto enable_msi; - nr_online_cpus = num_online_cpus(); - max_irqs = min_t(u32, nr_online_cpus + 2, IWL_MAX_RX_HW_QUEUES); + max_irqs = min_t(u32, num_online_cpus() + 2, IWL_MAX_RX_HW_QUEUES); for (i = 0; i < max_irqs; i++) trans_pcie->msix_entries[i].entry = i; @@ -1532,16 +1531,17 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, * Two interrupts less: non rx causes shared with FBQ and RSS. * More than two interrupts: we will use fewer RSS queues. */ - if (num_irqs <= nr_online_cpus) { + if (num_irqs <= max_irqs - 2) { trans_pcie->trans->num_rx_queues = num_irqs + 1; trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX | IWL_SHARED_IRQ_FIRST_RSS; - } else if (num_irqs == nr_online_cpus + 1) { + } else if (num_irqs == max_irqs - 1) { trans_pcie->trans->num_rx_queues = num_irqs; trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX; } else { trans_pcie->trans->num_rx_queues = num_irqs - 1; } + WARN_ON(trans_pcie->trans->num_rx_queues > IWL_MAX_RX_HW_QUEUES); trans_pcie->alloc_vecs = num_irqs; trans_pcie->msix_enabled = true; -- GitLab From d20dcd2f11357f2b0f52a243e4737c129240475d Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Fri, 25 May 2018 20:49:52 +0300 Subject: [PATCH 0052/1291] atm: zatm: fix memcmp casting [ Upstream commit f9c6442a8f0b1dde9e755eb4ff6fa22bcce4eabc ] memcmp() returns int, but eprom_try_esi() cast it to unsigned char. One can lose significant bits and get 0 from non-0 value returned by the memcmp(). Signed-off-by: Ivan Bornyakov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/atm/zatm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 9c9a22958717..a8d2eb0ceb8d 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -1151,8 +1151,8 @@ static void eprom_get_byte(struct zatm_dev *zatm_dev, unsigned char *byte, } -static unsigned char eprom_try_esi(struct atm_dev *dev, unsigned short cmd, - int offset, int swap) +static int eprom_try_esi(struct atm_dev *dev, unsigned short cmd, int offset, + int swap) { unsigned char buf[ZEPROM_SIZE]; struct zatm_dev *zatm_dev; -- GitLab From d689ad5c91af0843aceef7975d7ca9356c84a335 Mon Sep 17 00:00:00 2001 From: Josh Hill Date: Sun, 27 May 2018 20:10:41 -0400 Subject: [PATCH 0053/1291] net: qmi_wwan: Add Netgear Aircard 779S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2415f3bd059fe050eb98aedf93664d000ceb4e92 ] Add support for Netgear Aircard 779S Signed-off-by: Josh Hill Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 8e06f308ce44..b23ee948e7c9 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1103,6 +1103,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */ {QMI_FIXED_INTF(0x0846, 0x68a2, 8)}, + {QMI_FIXED_INTF(0x0846, 0x68d3, 8)}, /* Netgear Aircard 779S */ {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */ {QMI_FIXED_INTF(0x1435, 0xd181, 3)}, /* Wistron NeWeb D18Q1 */ -- GitLab From be5af6bec31a5c22f61de1be848c3fd0e6e3a2f8 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 28 May 2018 09:36:57 +0200 Subject: [PATCH 0054/1291] perf test: "Session topology" dumps core on s390 [ Upstream commit d121109100bda84bbbb199dab97f9d56432ab235 ] The "perf test Session topology" entry fails with core dump on s390. The root cause is a NULL pointer dereference in function check_cpu_topology() line 76 (or line 82 without -v). The session->header.env.cpu variable is NULL because on s390 function process_cpu_topology() returns with error: socket_id number is too big. You may need to upgrade the perf tool. and releases the env.cpu variable via zfree() and sets it to NULL. Here is the gdb output: (gdb) n 76 pr_debug("CPU %d, core %d, socket %d\n", i, (gdb) n Program received signal SIGSEGV, Segmentation fault. 0x00000000010f4d9e in check_cpu_topology (path=0x3ffffffd6c8 "/tmp/perf-test-J6CHMa", map=0x14a1740) at tests/topology.c:76 76 pr_debug("CPU %d, core %d, socket %d\n", i, (gdb) Make sure the env.cpu variable is not used when its NULL. Test for NULL pointer and return TEST_SKIP if so. Output before: [root@p23lp27 perf]# ./perf test -F 39 39: Session topology :Segmentation fault (core dumped) [root@p23lp27 perf]# Output after: [root@p23lp27 perf]# ./perf test -vF 39 39: Session topology : --- start --- templ file: /tmp/perf-test-Ajx59D socket_id number is too big.You may need to upgrade the perf tool. ---- end ---- Session topology: Skip [root@p23lp27 perf]# Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180528073657.11743-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/tests/topology.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index a59db7c45a65..81ede20f49d7 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -66,6 +66,27 @@ static int check_cpu_topology(char *path, struct cpu_map *map) session = perf_session__new(&file, false, NULL); TEST_ASSERT_VAL("can't get session", session); + /* On platforms with large numbers of CPUs process_cpu_topology() + * might issue an error while reading the perf.data file section + * HEADER_CPU_TOPOLOGY and the cpu_topology_map pointed to by member + * cpu is a NULL pointer. + * Example: On s390 + * CPU 0 is on core_id 0 and physical_package_id 6 + * CPU 1 is on core_id 1 and physical_package_id 3 + * + * Core_id and physical_package_id are platform and architecture + * dependend and might have higher numbers than the CPU id. + * This actually depends on the configuration. + * + * In this case process_cpu_topology() prints error message: + * "socket_id number is too big. You may need to upgrade the + * perf tool." + * + * This is the reason why this test might be skipped. + */ + if (!session->header.env.cpu) + return TEST_SKIP; + for (i = 0; i < session->header.env.nr_cpus_avail; i++) { if (!cpu_map__has(map, i)) continue; @@ -91,7 +112,7 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe { char path[PATH_MAX]; struct cpu_map *map; - int ret = -1; + int ret = TEST_FAIL; TEST_ASSERT_VAL("can't get templ file", !get_temp(path)); @@ -106,12 +127,9 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe goto free_path; } - if (check_cpu_topology(path, map)) - goto free_map; - ret = 0; - -free_map: + ret = check_cpu_topology(path, map); cpu_map__put(map); + free_path: unlink(path); return ret; -- GitLab From ae14c044587eceb1775c03e0be9e4e547d262aa2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 11 May 2018 19:21:42 +0800 Subject: [PATCH 0055/1291] perf bpf: Fix NULL return handling in bpf__prepare_load() [ Upstream commit ab4e32ff5aa797eaea551dbb67946e2fcb56cc7e ] bpf_object__open()/bpf_object__open_buffer can return error pointer or NULL, check the return values with IS_ERR_OR_NULL() in bpf__prepare_load and bpf__prepare_load_buffer Signed-off-by: YueHaibing Acked-by: Daniel Borkmann Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: netdev@vger.kernel.org Link: https://lkml.kernel.org/n/tip-psf4xwc09n62al2cb9s33v9h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/bpf-loader.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 72c107fcbc5a..c02d2cfd3aea 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -66,7 +66,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name) } obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name); - if (IS_ERR(obj)) { + if (IS_ERR_OR_NULL(obj)) { pr_debug("bpf: failed to load buffer\n"); return ERR_PTR(-EINVAL); } @@ -102,14 +102,14 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) pr_debug("bpf: successfull builtin compilation\n"); obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename); - if (!IS_ERR(obj) && llvm_param.dump_obj) + if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj) llvm__dump_obj(filename, obj_buf, obj_buf_sz); free(obj_buf); } else obj = bpf_object__open(filename); - if (IS_ERR(obj)) { + if (IS_ERR_OR_NULL(obj)) { pr_debug("bpf: failed to load %s\n", filename); return obj; } -- GitLab From 93b84462eadf0ebbf72abddfcf75d46ac9b59730 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 30 May 2018 19:43:53 -0700 Subject: [PATCH 0056/1291] fs: clear writeback errors in inode_init_always [ Upstream commit 829bc787c1a0403e4d886296dd4d90c5f9c1744a ] In inode_init_always(), we clear the inode mapping flags, which clears any retained error (AS_EIO, AS_ENOSPC) bits. Unfortunately, we do not also clear wb_err, which means that old mapping errors can leak through to new inodes. This is crucial for the XFS inode allocation path because we recycle old in-core inodes and we do not want error state from an old file to leak into the new file. This bug was discovered by running generic/036 and generic/047 in a loop and noticing that the EIOs generated by the collision of direct and buffered writes in generic/036 would survive the remount between 036 and 047, and get reported to the fsyncs (on different files!) in generic/047. Signed-off-by: Darrick J. Wong Reviewed-by: Jeff Layton Reviewed-by: Brian Foster Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/inode.c b/fs/inode.c index d1e35b53bb23..e07b3e1f5970 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -177,6 +177,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) mapping->a_ops = &empty_aops; mapping->host = inode; mapping->flags = 0; + mapping->wb_err = 0; atomic_set(&mapping->i_mmap_writable, 0); mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); mapping->private_data = NULL; -- GitLab From e4c55e0e6a754d21ea3d2e528e384b546192b9a1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Jul 2017 18:58:21 +0200 Subject: [PATCH 0057/1291] sched/core: Fix rules for running on online && !active CPUs [ Upstream commit 175f0e25abeaa2218d431141ce19cf1de70fa82d ] As already enforced by the WARN() in __set_cpus_allowed_ptr(), the rules for running on an online && !active CPU are stricter than just being a kthread, you need to be a per-cpu kthread. If you're not strictly per-CPU, you have better CPUs to run on and don't need the partially booted one to get your work done. The exception is to allow smpboot threads to bootstrap the CPU itself and get kernel 'services' initialized before we allow userspace on it. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Tejun Heo Cc: Thomas Gleixner Fixes: 955dbdf4ce87 ("sched: Allow migrating kthreads into online but inactive CPUs") Link: http://lkml.kernel.org/r/20170725165821.cejhb7v2s3kecems@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f287dcbe8cb2..002b56d2c9eb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -894,6 +894,33 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) } #ifdef CONFIG_SMP + +static inline bool is_per_cpu_kthread(struct task_struct *p) +{ + if (!(p->flags & PF_KTHREAD)) + return false; + + if (p->nr_cpus_allowed != 1) + return false; + + return true; +} + +/* + * Per-CPU kthreads are allowed to run on !actie && online CPUs, see + * __set_cpus_allowed_ptr() and select_fallback_rq(). + */ +static inline bool is_cpu_allowed(struct task_struct *p, int cpu) +{ + if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) + return false; + + if (is_per_cpu_kthread(p)) + return cpu_online(cpu); + + return cpu_active(cpu); +} + /* * This is how migration works: * @@ -951,16 +978,8 @@ struct migration_arg { static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf, struct task_struct *p, int dest_cpu) { - if (p->flags & PF_KTHREAD) { - if (unlikely(!cpu_online(dest_cpu))) - return rq; - } else { - if (unlikely(!cpu_active(dest_cpu))) - return rq; - } - /* Affinity changed (again). */ - if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + if (!is_cpu_allowed(p, dest_cpu)) return rq; update_rq_clock(rq); @@ -1489,10 +1508,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p) for (;;) { /* Any allowed, online CPU? */ for_each_cpu(dest_cpu, &p->cpus_allowed) { - if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu)) - continue; - if (!cpu_online(dest_cpu)) + if (!is_cpu_allowed(p, dest_cpu)) continue; + goto out; } -- GitLab From 0d5e04e239ad5b18c4099ef942843bf510af1122 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 26 May 2018 08:46:47 -0700 Subject: [PATCH 0058/1291] sched/core: Require cpu_active() in select_task_rq(), for user tasks [ Upstream commit 7af443ee1697607541c6346c87385adab2214743 ] select_task_rq() is used in a few paths to select the CPU upon which a thread should be run - for example it is used by try_to_wake_up() & by fork or exec balancing. As-is it allows use of any online CPU that is present in the task's cpus_allowed mask. This presents a problem because there is a period whilst CPUs are brought online where a CPU is marked online, but is not yet fully initialized - ie. the period where CPUHP_AP_ONLINE_IDLE <= state < CPUHP_ONLINE. Usually we don't run any user tasks during this window, but there are corner cases where this can happen. An example observed is: - Some user task A, running on CPU X, forks to create task B. - sched_fork() calls __set_task_cpu() with cpu=X, setting task B's task_struct::cpu field to X. - CPU X is offlined. - Task A, currently somewhere between the __set_task_cpu() in copy_process() and the call to wake_up_new_task(), is migrated to CPU Y by migrate_tasks() when CPU X is offlined. - CPU X is onlined, but still in the CPUHP_AP_ONLINE_IDLE state. The scheduler is now active on CPU X, but there are no user tasks on the runqueue. - Task A runs on CPU Y & reaches wake_up_new_task(). This calls select_task_rq() with cpu=X, taken from task B's task_struct, and select_task_rq() allows CPU X to be returned. - Task A enqueues task B on CPU X's runqueue, via activate_task() & enqueue_task(). - CPU X now has a user task on its runqueue before it has reached the CPUHP_ONLINE state. In most cases, the user tasks that schedule on the newly onlined CPU have no idea that anything went wrong, but one case observed to be problematic is if the task goes on to invoke the sched_setaffinity syscall. The newly onlined CPU reaches the CPUHP_AP_ONLINE_IDLE state before the CPU that brought it online calls stop_machine_unpark(). This means that for a portion of the window of time between CPUHP_AP_ONLINE_IDLE & CPUHP_ONLINE the newly onlined CPU's struct cpu_stopper has its enabled field set to false. If a user thread is executed on the CPU during this window and it invokes sched_setaffinity with a CPU mask that does not include the CPU it's running on, then when __set_cpus_allowed_ptr() calls stop_one_cpu() intending to invoke migration_cpu_stop() and perform the actual migration away from the CPU it will simply return -ENOENT rather than calling migration_cpu_stop(). We then return from the sched_setaffinity syscall back to the user task that is now running on a CPU which it just asked not to run on, and which is not present in its cpus_allowed mask. This patch resolves the problem by having select_task_rq() enforce that user tasks run on CPUs that are active - the same requirement that select_fallback_rq() already enforces. This should ensure that newly onlined CPUs reach the CPUHP_AP_ACTIVE state before being able to schedule user tasks, and also implies that bringup_wait_for_ap() will have called stop_machine_unpark() which resolves the sched_setaffinity issue above. I haven't yet investigated them, but it may be of interest to review whether any of the actions performed by hotplug states between CPUHP_AP_ONLINE_IDLE & CPUHP_AP_ACTIVE could have similar unintended effects on user tasks that might schedule before they are reached, which might widen the scope of the problem from just affecting the behaviour of sched_setaffinity. Signed-off-by: Paul Burton Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180526154648.11635-2-paul.burton@mips.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 002b56d2c9eb..31615d1ae44c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1573,8 +1573,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) * [ this allows ->select_task() to simply return task_cpu(p) and * not worry about this generic constraint ] */ - if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || - !cpu_online(cpu))) + if (unlikely(!is_cpu_allowed(p, cpu))) cpu = select_fallback_rq(task_cpu(p), p); return cpu; -- GitLab From 4888ced6b749aa51355b4614af25e2eec0818f62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= Date: Tue, 22 May 2018 14:30:15 -0700 Subject: [PATCH 0059/1291] platform/x86: asus-wmi: Fix NULL pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 32ffd6e8d1f6cef94bedca15dfcdebdeb590499d ] Do not perform the rfkill cleanup routine when (asus->driver->wlan_ctrl_by_user && ashs_present()) is true, since nothing is registered with the rfkill subsystem in that case. Doing so leads to the following kernel NULL pointer dereference: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] __mutex_lock_slowpath+0x98/0x120 PGD 1a3aa8067 PUD 1a3b3d067 PMD 0 Oops: 0002 [#1] PREEMPT SMP Modules linked in: bnep ccm binfmt_misc uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_core hid_a4tech videodev x86_pkg_temp_thermal intel_powerclamp coretemp ath3k btusb btrtl btintel bluetooth kvm_intel snd_hda_codec_hdmi kvm snd_hda_codec_realtek snd_hda_codec_generic irqbypass crc32c_intel arc4 i915 snd_hda_intel snd_hda_codec ath9k ath9k_common ath9k_hw ath i2c_algo_bit snd_hwdep mac80211 ghash_clmulni_intel snd_hda_core snd_pcm snd_timer cfg80211 ehci_pci xhci_pci drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm xhci_hcd ehci_hcd asus_nb_wmi(-) asus_wmi sparse_keymap r8169 rfkill mxm_wmi serio_raw snd mii mei_me lpc_ich i2c_i801 video soundcore mei i2c_smbus wmi i2c_core mfd_core CPU: 3 PID: 3275 Comm: modprobe Not tainted 4.9.34-gentoo #34 Hardware name: ASUSTeK COMPUTER INC. K56CM/K56CM, BIOS K56CM.206 08/21/2012 task: ffff8801a639ba00 task.stack: ffffc900014cc000 RIP: 0010:[] [] __mutex_lock_slowpath+0x98/0x120 RSP: 0018:ffffc900014cfce0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff8801a54315b0 RCX: 00000000c0000100 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff8801a54315b4 RBP: ffffc900014cfd30 R08: 0000000000000000 R09: 0000000000000002 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801a54315b4 R13: ffff8801a639ba00 R14: 00000000ffffffff R15: ffff8801a54315b8 FS: 00007faa254fb700(0000) GS:ffff8801aef80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000001a3b1b000 CR4: 00000000001406e0 Stack: ffff8801a54315b8 0000000000000000 ffffffff814733ae ffffc900014cfd28 ffffffff8146a28c ffff8801a54315b0 0000000000000000 ffff8801a54315b0 ffff8801a66f3820 0000000000000000 ffffc900014cfd48 ffffffff816c73e7 Call Trace: [] ? acpi_ut_release_mutex+0x5d/0x61 [] ? acpi_ns_get_node+0x49/0x52 [] mutex_lock+0x17/0x30 [] asus_rfkill_hotplug+0x24/0x1a0 [asus_wmi] [] asus_wmi_rfkill_exit+0x61/0x150 [asus_wmi] [] asus_wmi_remove+0x61/0xb0 [asus_wmi] [] platform_drv_remove+0x28/0x40 [] __device_release_driver+0xa1/0x160 [] device_release_driver+0x23/0x30 [] bus_remove_device+0xfd/0x170 [] device_del+0x139/0x270 [] platform_device_del+0x28/0x90 [] platform_device_unregister+0x12/0x30 [] asus_wmi_unregister_driver+0x19/0x30 [asus_wmi] [] asus_nb_wmi_exit+0x10/0xf26 [asus_nb_wmi] [] SyS_delete_module+0x192/0x270 [] ? exit_to_usermode_loop+0x92/0xa0 [] entry_SYSCALL_64_fastpath+0x13/0x94 Code: e8 5e 30 00 00 8b 03 83 f8 01 0f 84 93 00 00 00 48 8b 43 10 4c 8d 7b 08 48 89 63 10 41 be ff ff ff ff 4c 89 3c 24 48 89 44 24 08 <48> 89 20 4c 89 6c 24 10 eb 1d 4c 89 e7 49 c7 45 08 02 00 00 00 RIP [] __mutex_lock_slowpath+0x98/0x120 RSP CR2: 0000000000000000 ---[ end trace 8d484233fa7cb512 ]--- note: modprobe[3275] exited with preempt_count 2 https://bugzilla.kernel.org/show_bug.cgi?id=196467 Reported-by: red.f0xyz@gmail.com Signed-off-by: João Paulo Rechi Vita Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/asus-wmi.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 48e1541dc8d4..7440f650e81a 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -161,6 +161,16 @@ MODULE_LICENSE("GPL"); static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; +static bool ashs_present(void) +{ + int i = 0; + while (ashs_ids[i]) { + if (acpi_dev_found(ashs_ids[i++])) + return true; + } + return false; +} + struct bios_args { u32 arg0; u32 arg1; @@ -962,6 +972,9 @@ static int asus_new_rfkill(struct asus_wmi *asus, static void asus_wmi_rfkill_exit(struct asus_wmi *asus) { + if (asus->driver->wlan_ctrl_by_user && ashs_present()) + return; + asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P5"); asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P6"); asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P7"); @@ -2058,16 +2071,6 @@ static int asus_wmi_fan_init(struct asus_wmi *asus) return 0; } -static bool ashs_present(void) -{ - int i = 0; - while (ashs_ids[i]) { - if (acpi_dev_found(ashs_ids[i++])) - return true; - } - return false; -} - /* * WMI Driver */ -- GitLab From 28b64cc7a8462cdfffc171ca583179f6c1c36b65 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 30 May 2018 13:03:51 +1000 Subject: [PATCH 0060/1291] net/sonic: Use dma_mapping_error() [ Upstream commit 26de0b76d9ba3200f09c6cb9d9618bda338be5f7 ] With CONFIG_DMA_API_DEBUG=y, calling sonic_open() produces the message, "DMA-API: device driver failed to check map error". Add the missing dma_mapping_error() call. Cc: Thomas Bogendoerfer Signed-off-by: Finn Thain Acked-by: Thomas Bogendoerfer Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/natsemi/sonic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 612c7a44b26c..23821540ab07 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -71,7 +71,7 @@ static int sonic_open(struct net_device *dev) for (i = 0; i < SONIC_NUM_RRS; i++) { dma_addr_t laddr = dma_map_single(lp->device, skb_put(lp->rx_skb[i], SONIC_RBSIZE), SONIC_RBSIZE, DMA_FROM_DEVICE); - if (!laddr) { + if (dma_mapping_error(lp->device, laddr)) { while(i > 0) { /* free any that were mapped successfully */ i--; dma_unmap_single(lp->device, lp->rx_laddr[i], SONIC_RBSIZE, DMA_FROM_DEVICE); -- GitLab From 88b01cac4add369bf3f0401a1da7a9bd6bf19305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Damien=20Th=C3=A9bault?= Date: Thu, 31 May 2018 07:04:01 +0000 Subject: [PATCH 0061/1291] net: dsa: b53: Add BCM5389 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a95691bc54af1ac4b12c354f91e9cabf1cb068df ] This patch adds support for the BCM5389 switch connected through MDIO. Signed-off-by: Damien Thébault Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/net/dsa/b53.txt | 1 + drivers/net/dsa/b53/b53_common.c | 13 +++++++++++++ drivers/net/dsa/b53/b53_mdio.c | 5 ++++- drivers/net/dsa/b53/b53_priv.h | 1 + 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/dsa/b53.txt b/Documentation/devicetree/bindings/net/dsa/b53.txt index 8acf51a4dfa8..47a6a7fe0b86 100644 --- a/Documentation/devicetree/bindings/net/dsa/b53.txt +++ b/Documentation/devicetree/bindings/net/dsa/b53.txt @@ -10,6 +10,7 @@ Required properties: "brcm,bcm53128" "brcm,bcm5365" "brcm,bcm5395" + "brcm,bcm5389" "brcm,bcm5397" "brcm,bcm5398" diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 274f3679f33d..acf64d4cd94c 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1549,6 +1549,18 @@ static const struct b53_chip_data b53_switch_chips[] = { .cpu_port = B53_CPU_PORT_25, .duplex_reg = B53_DUPLEX_STAT_FE, }, + { + .chip_id = BCM5389_DEVICE_ID, + .dev_name = "BCM5389", + .vlans = 4096, + .enabled_ports = 0x1f, + .arl_entries = 4, + .cpu_port = B53_CPU_PORT, + .vta_regs = B53_VTA_REGS, + .duplex_reg = B53_DUPLEX_STAT_GE, + .jumbo_pm_reg = B53_JUMBO_PORT_MASK, + .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + }, { .chip_id = BCM5395_DEVICE_ID, .dev_name = "BCM5395", @@ -1872,6 +1884,7 @@ int b53_switch_detect(struct b53_device *dev) else dev->chip_id = BCM5365_DEVICE_ID; break; + case BCM5389_DEVICE_ID: case BCM5395_DEVICE_ID: case BCM5397_DEVICE_ID: case BCM5398_DEVICE_ID: diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c index fa7556f5d4fb..a533a90e3904 100644 --- a/drivers/net/dsa/b53/b53_mdio.c +++ b/drivers/net/dsa/b53/b53_mdio.c @@ -285,6 +285,7 @@ static const struct b53_io_ops b53_mdio_ops = { #define B53_BRCM_OUI_1 0x0143bc00 #define B53_BRCM_OUI_2 0x03625c00 #define B53_BRCM_OUI_3 0x00406000 +#define B53_BRCM_OUI_4 0x01410c00 static int b53_mdio_probe(struct mdio_device *mdiodev) { @@ -311,7 +312,8 @@ static int b53_mdio_probe(struct mdio_device *mdiodev) */ if ((phy_id & 0xfffffc00) != B53_BRCM_OUI_1 && (phy_id & 0xfffffc00) != B53_BRCM_OUI_2 && - (phy_id & 0xfffffc00) != B53_BRCM_OUI_3) { + (phy_id & 0xfffffc00) != B53_BRCM_OUI_3 && + (phy_id & 0xfffffc00) != B53_BRCM_OUI_4) { dev_err(&mdiodev->dev, "Unsupported device: 0x%08x\n", phy_id); return -ENODEV; } @@ -360,6 +362,7 @@ static const struct of_device_id b53_of_match[] = { { .compatible = "brcm,bcm53125" }, { .compatible = "brcm,bcm53128" }, { .compatible = "brcm,bcm5365" }, + { .compatible = "brcm,bcm5389" }, { .compatible = "brcm,bcm5395" }, { .compatible = "brcm,bcm5397" }, { .compatible = "brcm,bcm5398" }, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 01bd8cbe9a3f..6b9e39ddaec1 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -48,6 +48,7 @@ struct b53_io_ops { enum { BCM5325_DEVICE_ID = 0x25, BCM5365_DEVICE_ID = 0x65, + BCM5389_DEVICE_ID = 0x89, BCM5395_DEVICE_ID = 0x95, BCM5397_DEVICE_ID = 0x97, BCM5398_DEVICE_ID = 0x98, -- GitLab From 5893f4c3fb784f48c020d2637c129a45da7be39e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 8 Jul 2018 15:30:53 +0200 Subject: [PATCH 0062/1291] Linux 4.14.54 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fb66998408f4..de0955d8dfa3 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 53 +SUBLEVEL = 54 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 6fe74fb8af8949e91b67fc3fc244c8db0e2e276d Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 3 Jul 2018 17:02:39 -0700 Subject: [PATCH 0063/1291] userfaultfd: hugetlbfs: fix userfaultfd_huge_must_wait() pte access commit 1e2c043628c7736dd56536d16c0ce009bc834ae7 upstream. Use huge_ptep_get() to translate huge ptes to normal ptes so we can check them with the huge_pte_* functions. Otherwise some architectures will check the wrong values and will not wait for userspace to bring in the memory. Link: http://lkml.kernel.org/r/20180626132421.78084-1-frankja@linux.ibm.com Fixes: 369cd2121be4 ("userfaultfd: hugetlbfs: userfaultfd_huge_must_wait for hugepmd ranges") Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Reviewed-by: Mike Kravetz Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/userfaultfd.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 5aa392eae1c3..f6ed92524a03 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -220,24 +220,26 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, unsigned long reason) { struct mm_struct *mm = ctx->mm; - pte_t *pte; + pte_t *ptep, pte; bool ret = true; VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); - pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma)); - if (!pte) + ptep = huge_pte_offset(mm, address, vma_mmu_pagesize(vma)); + + if (!ptep) goto out; ret = false; + pte = huge_ptep_get(ptep); /* * Lockless access: we're in a wait_event so it's ok if it * changes under us. */ - if (huge_pte_none(*pte)) + if (huge_pte_none(pte)) ret = true; - if (!huge_pte_write(*pte) && (reason & VM_UFFD_WP)) + if (!huge_pte_write(pte) && (reason & VM_UFFD_WP)) ret = true; out: return ret; -- GitLab From 48b019a51ab8836dcea2d5e68e07fa129b604c3a Mon Sep 17 00:00:00 2001 From: Cannon Matthews Date: Tue, 3 Jul 2018 17:02:43 -0700 Subject: [PATCH 0064/1291] mm: hugetlb: yield when prepping struct pages commit 520495fe96d74e05db585fc748351e0504d8f40d upstream. When booting with very large numbers of gigantic (i.e. 1G) pages, the operations in the loop of gather_bootmem_prealloc, and specifically prep_compound_gigantic_page, takes a very long time, and can cause a softlockup if enough pages are requested at boot. For example booting with 3844 1G pages requires prepping (set_compound_head, init the count) over 1 billion 4K tail pages, which takes considerable time. Add a cond_resched() to the outer loop in gather_bootmem_prealloc() to prevent this lockup. Tested: Booted with softlockup_panic=1 hugepagesz=1G hugepages=3844 and no softlockup is reported, and the hugepages are reported as successfully setup. Link: http://lkml.kernel.org/r/20180627214447.260804-1-cannonmatthews@google.com Signed-off-by: Cannon Matthews Reviewed-by: Andrew Morton Reviewed-by: Mike Kravetz Acked-by: Michal Hocko Cc: Andres Lagar-Cavilla Cc: Peter Feiner Cc: Greg Thelen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b1f841a9edd4..dfd2947e046e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2159,6 +2159,7 @@ static void __init gather_bootmem_prealloc(void) */ if (hstate_is_gigantic(h)) adjust_managed_page_count(page, 1 << h->order); + cond_resched(); } } -- GitLab From 54f1da1ff0347182ca58c6d1f64551afb36ecb90 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Wed, 31 Jan 2018 23:48:49 +0800 Subject: [PATCH 0065/1291] tracing: Fix missing return symbol in function_graph output commit 1fe4293f4b8de75824935f8d8e9a99c7fc6873da upstream. The function_graph tracer does not show the interrupt return marker for the leaf entry. On leaf entries, we see an unbalanced interrupt marker (the interrupt was entered, but nevern left). Before: 1) | SyS_write() { 1) | __fdget_pos() { 1) 0.061 us | __fget_light(); 1) 0.289 us | } 1) | vfs_write() { 1) 0.049 us | rw_verify_area(); 1) + 15.424 us | __vfs_write(); 1) ==========> | 1) 6.003 us | smp_apic_timer_interrupt(); 1) 0.055 us | __fsnotify_parent(); 1) 0.073 us | fsnotify(); 1) + 23.665 us | } 1) + 24.501 us | } After: 0) | SyS_write() { 0) | __fdget_pos() { 0) 0.052 us | __fget_light(); 0) 0.328 us | } 0) | vfs_write() { 0) 0.057 us | rw_verify_area(); 0) | __vfs_write() { 0) ==========> | 0) 8.548 us | smp_apic_timer_interrupt(); 0) <========== | 0) + 36.507 us | } /* __vfs_write */ 0) 0.049 us | __fsnotify_parent(); 0) 0.066 us | fsnotify(); 0) + 50.064 us | } 0) + 50.952 us | } Link: http://lkml.kernel.org/r/1517413729-20411-1-git-send-email-changbin.du@intel.com Cc: stable@vger.kernel.org Fixes: f8b755ac8e0cc ("tracing/function-graph-tracer: Output arrows signal on hardirq call/return") Signed-off-by: Changbin Du Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_functions_graph.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 23c0b0cb5fb9..169b3c44ee97 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -831,6 +831,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, struct ftrace_graph_ret *graph_ret; struct ftrace_graph_ent *call; unsigned long long duration; + int cpu = iter->cpu; int i; graph_ret = &ret_entry->ret; @@ -839,7 +840,6 @@ print_graph_entry_leaf(struct trace_iterator *iter, if (data) { struct fgraph_cpu_data *cpu_data; - int cpu = iter->cpu; cpu_data = per_cpu_ptr(data->cpu_data, cpu); @@ -869,6 +869,9 @@ print_graph_entry_leaf(struct trace_iterator *iter, trace_seq_printf(s, "%ps();\n", (void *)call->func); + print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET, + cpu, iter->ent->pid, flags); + return trace_handle_return(s); } -- GitLab From 6e51bfa950864343cfe210a75268e826a2b4b2e8 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 25 Jun 2018 16:25:44 +0200 Subject: [PATCH 0066/1291] scsi: sg: mitigate read/write abuse commit 26b5b874aff5659a7e26e5b1997e3df2c41fa7fd upstream. As Al Viro noted in commit 128394eff343 ("sg_write()/bsg_write() is not fit to be called under KERNEL_DS"), sg improperly accesses userspace memory outside the provided buffer, permitting kernel memory corruption via splice(). But it doesn't just do it on ->write(), also on ->read(). As a band-aid, make sure that the ->read() and ->write() handlers can not be called in weird contexts (kernel context or credentials different from file opener), like for ib_safe_file_access(). If someone needs to use these interfaces from different security contexts, a new interface should be written that goes through the ->ioctl() handler. I've mostly copypasted ib_safe_file_access() over as sg_safe_file_access() because I couldn't find a good common header - please tell me if you know a better way. [mkp: s/_safe_/_check_/] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Signed-off-by: Jann Horn Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 17a4cc138b00..4d49fb8f2bbc 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -51,6 +51,7 @@ static int sg_version_num = 30536; /* 2 digits for each component */ #include #include #include +#include /* for sg_check_file_access() */ #include "scsi.h" #include @@ -210,6 +211,33 @@ static void sg_device_destroy(struct kref *kref); sdev_prefix_printk(prefix, (sdp)->device, \ (sdp)->disk->disk_name, fmt, ##a) +/* + * The SCSI interfaces that use read() and write() as an asynchronous variant of + * ioctl(..., SG_IO, ...) are fundamentally unsafe, since there are lots of ways + * to trigger read() and write() calls from various contexts with elevated + * privileges. This can lead to kernel memory corruption (e.g. if these + * interfaces are called through splice()) and privilege escalation inside + * userspace (e.g. if a process with access to such a device passes a file + * descriptor to a SUID binary as stdin/stdout/stderr). + * + * This function provides protection for the legacy API by restricting the + * calling context. + */ +static int sg_check_file_access(struct file *filp, const char *caller) +{ + if (filp->f_cred != current_real_cred()) { + pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", + caller, task_tgid_vnr(current), current->comm); + return -EPERM; + } + if (uaccess_kernel()) { + pr_err_once("%s: process %d (%s) called from kernel context, this is not allowed.\n", + caller, task_tgid_vnr(current), current->comm); + return -EACCES; + } + return 0; +} + static int sg_allow_access(struct file *filp, unsigned char *cmd) { struct sg_fd *sfp = filp->private_data; @@ -394,6 +422,14 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos) struct sg_header *old_hdr = NULL; int retval = 0; + /* + * This could cause a response to be stranded. Close the associated + * file descriptor to free up any resources being held. + */ + retval = sg_check_file_access(filp, __func__); + if (retval) + return retval; + if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, @@ -581,9 +617,11 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) struct sg_header old_hdr; sg_io_hdr_t *hp; unsigned char cmnd[SG_MAX_CDB_SIZE]; + int retval; - if (unlikely(uaccess_kernel())) - return -EINVAL; + retval = sg_check_file_access(filp, __func__); + if (retval) + return retval; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; -- GitLab From e6cf7e68728560f1466046a85c4ae5c685c7b651 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Tue, 19 Jun 2018 17:58:24 +0200 Subject: [PATCH 0067/1291] scsi: target: Fix truncated PR-in ReadKeys response commit 63ce3c384db26494615e3c8972bcd419ed71f4c4 upstream. SPC5r17 states that the contents of the ADDITIONAL LENGTH field are not altered based on the allocation length, so always calculate and pack the full key list length even if the list itself is truncated. According to Maged: Yes it fixes the "Storage Spaces Persistent Reservation" test in the Windows 2016 Server Failover Cluster validation suites when having many connections that result in more than 8 registrations. I tested your patch on 4.17 with iblock. This behaviour can be tested using the libiscsi PrinReadKeys.Truncate test. Cc: stable@vger.kernel.org Signed-off-by: David Disseldorp Reviewed-by: Mike Christie Tested-by: Maged Mokhtar Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_pr.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 4ba5004a069e..fd6ce9996488 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -3729,11 +3729,16 @@ core_scsi3_pri_read_keys(struct se_cmd *cmd) * Check for overflow of 8byte PRI READ_KEYS payload and * next reservation key list descriptor. */ - if ((add_len + 8) > (cmd->data_length - 8)) - break; - - put_unaligned_be64(pr_reg->pr_res_key, &buf[off]); - off += 8; + if (off + 8 <= cmd->data_length) { + put_unaligned_be64(pr_reg->pr_res_key, &buf[off]); + off += 8; + } + /* + * SPC5r17: 6.16.2 READ KEYS service action + * The ADDITIONAL LENGTH field indicates the number of bytes in + * the Reservation key list. The contents of the ADDITIONAL + * LENGTH field are not altered based on the allocation length + */ add_len += 8; } spin_unlock(&dev->t10_pr.registration_lock); -- GitLab From 2b6eff5923ce501d69e58807a1f81d910c029c8f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 21 Jun 2018 14:49:38 +0200 Subject: [PATCH 0068/1291] s390: Correct register corruption in critical section cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 891f6a726cacbb87e5b06076693ffab53bd378d7 upstream. In the critical section cleanup we must not mess with r1. For march=z9 or older, larl + ex (instead of exrl) are used with r1 as a temporary register. This can clobber r1 in several interrupt handlers. Fix this by using r11 as a temp register. r11 is being saved by all callers of cleanup_critical. Fixes: 6dd85fbb87 ("s390: move expoline assembler macros to a header") Cc: stable@vger.kernel.org #v4.16 Reported-by: Oliver Kurz Reported-by: Petr Tesařík Signed-off-by: Christian Borntraeger Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index be20b1f73384..e928c2af6a10 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1244,7 +1244,7 @@ cleanup_critical: jl 0f clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end jl .Lcleanup_load_fpu_regs -0: BR_EX %r14 +0: BR_EX %r14,%r11 .align 8 .Lcleanup_table: @@ -1280,7 +1280,7 @@ cleanup_critical: ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit - BR_EX %r14 + BR_EX %r14,%r11 #endif .Lcleanup_system_call: -- GitLab From 0ce6c46463715cecf78e6b92381f05dd47724f65 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 25 Jun 2018 11:39:52 +0200 Subject: [PATCH 0069/1291] drbd: fix access after free commit 64dafbc9530c10300acffc57fae3269d95fa8f93 upstream. We have struct drbd_requests { ... struct bio *private_bio; ... } to hold a bio clone for local submission. On local IO completion, we put that bio, and in case we want to use the result later, we overload that member to hold the ERR_PTR() of the completion result, Which, before v4.3, used to be the passed in "int error", so we could first bio_put(), then assign. v4.3-rc1~100^2~21 4246a0b63bd8 block: add a bi_error field to struct bio changed that: bio_put(req->private_bio); - req->private_bio = ERR_PTR(error); + req->private_bio = ERR_PTR(bio->bi_error); Which introduces an access after free, because it was non obvious that req->private_bio == bio. Impact of that was mostly unnoticable, because we only use that value in a multiple-failure case, and even then map any "unexpected" error code to EIO, so worst case we could potentially mask a more specific error with EIO in a multiple failure case. Unless the pointed to memory region was unmapped, as is the case with CONFIG_DEBUG_PAGEALLOC, in which case this results in BUG: unable to handle kernel paging request v4.13-rc1~70^2~75 4e4cbee93d56 block: switch bios to blk_status_t changes it further to bio_put(req->private_bio); req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status)); And blk_status_to_errno() now contains a WARN_ON_ONCE() for unexpected values, which catches this "sometimes", if the memory has been reused quickly enough for other things. Should also go into stable since 4.3, with the trivial change around 4.13. Cc: stable@vger.kernel.org Fixes: 4246a0b63bd8 block: add a bi_error field to struct bio Reported-by: Sarah Newman Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/drbd/drbd_worker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 03471b3fce86..c2042f822b03 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -282,8 +282,8 @@ void drbd_request_endio(struct bio *bio) what = COMPLETED_OK; } - bio_put(req->private_bio); req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status)); + bio_put(bio); /* not req_mod(), we need irqsave here! */ spin_lock_irqsave(&device->resource->req_lock, flags); -- GitLab From 5d8ddc819c848a6009aed5a560e8d53f7cbafa08 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 29 Jun 2018 11:31:50 -0600 Subject: [PATCH 0070/1291] vfio: Use get_user_pages_longterm correctly commit bb94b55af3461e26b32f0e23d455abeae0cfca5d upstream. The patch noted in the fixes below converted get_user_pages_fast() to get_user_pages_longterm(), however the two calls differ in a few ways. First _fast() is documented to not require the mmap_sem, while _longterm() is documented to need it. Hold the mmap sem as required. Second, _fast accepts an 'int write' while _longterm uses 'unsigned int gup_flags', so the expression '!!(prot & IOMMU_WRITE)' is only working by luck as FOLL_WRITE is currently == 0x1. Use the expected FOLL_WRITE constant instead. Fixes: 94db151dc892 ("vfio: disable filesystem-dax page pinning") Cc: Signed-off-by: Jason Gunthorpe Acked-by: Dan Williams Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio_iommu_type1.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index fb4e6a7ee521..d639378e36ac 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -339,18 +339,16 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, struct page *page[1]; struct vm_area_struct *vma; struct vm_area_struct *vmas[1]; + unsigned int flags = 0; int ret; + if (prot & IOMMU_WRITE) + flags |= FOLL_WRITE; + + down_read(&mm->mmap_sem); if (mm == current->mm) { - ret = get_user_pages_longterm(vaddr, 1, !!(prot & IOMMU_WRITE), - page, vmas); + ret = get_user_pages_longterm(vaddr, 1, flags, page, vmas); } else { - unsigned int flags = 0; - - if (prot & IOMMU_WRITE) - flags |= FOLL_WRITE; - - down_read(&mm->mmap_sem); ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page, vmas, NULL); /* @@ -364,8 +362,8 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, ret = -EOPNOTSUPP; put_page(page[0]); } - up_read(&mm->mmap_sem); } + up_read(&mm->mmap_sem); if (ret == 1) { *pfn = page_to_pfn(page[0]); -- GitLab From ff533735af1d31c621d1f4297cd3f6f01673b74b Mon Sep 17 00:00:00 2001 From: Lars Persson Date: Mon, 25 Jun 2018 14:05:25 +0200 Subject: [PATCH 0071/1291] cifs: Fix use after free of a mid_q_entry commit 696e420bb2a6624478105651d5368d45b502b324 upstream. With protocol version 2.0 mounts we have seen crashes with corrupt mid entries. Either the server->pending_mid_q list becomes corrupt with a cyclic reference in one element or a mid object fetched by the demultiplexer thread becomes overwritten during use. Code review identified a race between the demultiplexer thread and the request issuing thread. The demultiplexer thread seems to be written with the assumption that it is the sole user of the mid object until it calls the mid callback which either wakes the issuer task or deletes the mid. This assumption is not true because the issuer task can be woken up earlier by a signal. If the demultiplexer thread has proceeded as far as setting the mid_state to MID_RESPONSE_RECEIVED then the issuer thread will happily end up calling cifs_delete_mid while the demultiplexer thread still is using the mid object. Inserting a delay in the cifs demultiplexer thread widens the race window and makes reproduction of the race very easy: if (server->large_buf) buf = server->bigbuf; + usleep_range(500, 4000); server->lstrp = jiffies; To resolve this I think the proper solution involves putting a reference count on the mid object. This patch makes sure that the demultiplexer thread holds a reference until it has finished processing the transaction. Cc: stable@vger.kernel.org Signed-off-by: Lars Persson Acked-by: Paulo Alcantara Reviewed-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsglob.h | 1 + fs/cifs/cifsproto.h | 1 + fs/cifs/connect.c | 8 +++++++- fs/cifs/smb1ops.c | 1 + fs/cifs/smb2ops.c | 1 + fs/cifs/smb2transport.c | 1 + fs/cifs/transport.c | 18 +++++++++++++++++- 7 files changed, 29 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 33d6eb58ce34..f29cdb1cdeb7 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1340,6 +1340,7 @@ typedef int (mid_handle_t)(struct TCP_Server_Info *server, /* one of these for every pending CIFS request to the server */ struct mid_q_entry { struct list_head qhead; /* mids waiting on reply from this server */ + struct kref refcount; struct TCP_Server_Info *server; /* server corresponding to this mid */ __u64 mid; /* multiplex id */ __u32 pid; /* process id */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 762d513a5087..ccdb42f71b2e 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -76,6 +76,7 @@ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server); extern void DeleteMidQEntry(struct mid_q_entry *midEntry); extern void cifs_delete_mid(struct mid_q_entry *mid); +extern void cifs_mid_q_entry_release(struct mid_q_entry *midEntry); extern void cifs_wake_up_task(struct mid_q_entry *mid); extern int cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f7db2fedfa8c..fd24c72bd2cd 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -889,6 +889,7 @@ cifs_demultiplex_thread(void *p) continue; server->total_read += length; + mid_entry = NULL; if (server->ops->is_transform_hdr && server->ops->receive_transform && server->ops->is_transform_hdr(buf)) { @@ -903,8 +904,11 @@ cifs_demultiplex_thread(void *p) length = mid_entry->receive(server, mid_entry); } - if (length < 0) + if (length < 0) { + if (mid_entry) + cifs_mid_q_entry_release(mid_entry); continue; + } if (server->large_buf) buf = server->bigbuf; @@ -920,6 +924,8 @@ cifs_demultiplex_thread(void *p) if (!mid_entry->multiRsp || mid_entry->multiEnd) mid_entry->callback(mid_entry); + + cifs_mid_q_entry_release(mid_entry); } else if (server->ops->is_oplock_break && server->ops->is_oplock_break(buf, server)) { cifs_dbg(FYI, "Received oplock break\n"); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index a723df3e0197..d8cd82001c1c 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -105,6 +105,7 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer) if (compare_mid(mid->mid, buf) && mid->mid_state == MID_REQUEST_SUBMITTED && le16_to_cpu(mid->command) == buf->Command) { + kref_get(&mid->refcount); spin_unlock(&GlobalMid_Lock); return mid; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 36bc9a7eb8ea..51d69473dbd9 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -202,6 +202,7 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf) if ((mid->mid == wire_mid) && (mid->mid_state == MID_REQUEST_SUBMITTED) && (mid->command == shdr->Command)) { + kref_get(&mid->refcount); spin_unlock(&GlobalMid_Lock); return mid; } diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index bf49cb73b9e6..a41fc4a63a59 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -548,6 +548,7 @@ smb2_mid_entry_alloc(const struct smb2_sync_hdr *shdr, temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); + kref_init(&temp->refcount); temp->mid = le64_to_cpu(shdr->MessageId); temp->pid = current->pid; temp->command = shdr->Command; /* Always LE */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 7efbab013957..a10f51dfa7f5 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -56,6 +56,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); + kref_init(&temp->refcount); temp->mid = get_mid(smb_buffer); temp->pid = current->pid; temp->command = cpu_to_le16(smb_buffer->Command); @@ -77,6 +78,21 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) return temp; } +static void _cifs_mid_q_entry_release(struct kref *refcount) +{ + struct mid_q_entry *mid = container_of(refcount, struct mid_q_entry, + refcount); + + mempool_free(mid, cifs_mid_poolp); +} + +void cifs_mid_q_entry_release(struct mid_q_entry *midEntry) +{ + spin_lock(&GlobalMid_Lock); + kref_put(&midEntry->refcount, _cifs_mid_q_entry_release); + spin_unlock(&GlobalMid_Lock); +} + void DeleteMidQEntry(struct mid_q_entry *midEntry) { @@ -105,7 +121,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) } } #endif - mempool_free(midEntry, cifs_mid_poolp); + cifs_mid_q_entry_release(midEntry); } void -- GitLab From f5f485d888d541431ce40aa1e6bc3ae95bd5ae91 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 4 Jul 2018 14:16:16 -0300 Subject: [PATCH 0072/1291] cifs: Fix memory leak in smb2_set_ea() commit 6aa0c114eceec8cc61715f74a4ce91b048d7561c upstream. This patch fixes a memory leak when doing a setxattr(2) in SMB2+. Signed-off-by: Paulo Alcantara Cc: stable@vger.kernel.org Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 51d69473dbd9..83267ac3a3f0 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -636,6 +636,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_set_ea(xid, tcon, fid.persistent_fid, fid.volatile_fid, ea, len); + kfree(ea); + SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); return rc; -- GitLab From 28cada984c0e940d8419e6b3955d1f513f989e3a Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 5 Jul 2018 13:46:34 -0300 Subject: [PATCH 0073/1291] cifs: Fix infinite loop when using hard mount option commit 7ffbe65578b44fafdef577a360eb0583929f7c6e upstream. For every request we send, whether it is SMB1 or SMB2+, we attempt to reconnect tcon (cifs_reconnect_tcon or smb2_reconnect) before carrying out the request. So, while server->tcpStatus != CifsNeedReconnect, we wait for the reconnection to succeed on wait_event_interruptible_timeout(). If it returns, that means that either the condition was evaluated to true, or timeout elapsed, or it was interrupted by a signal. Since we're not handling the case where the process woke up due to a received signal (-ERESTARTSYS), the next call to wait_event_interruptible_timeout() will _always_ fail and we end up looping forever inside either cifs_reconnect_tcon() or smb2_reconnect(). Here's an example of how to trigger that: $ mount.cifs //foo/share /mnt/test -o username=foo,password=foo,vers=1.0,hard (break connection to server before executing bellow cmd) $ stat -f /mnt/test & sleep 140 [1] 2511 $ ps -aux -q 2511 USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND root 2511 0.0 0.0 12892 1008 pts/0 S 12:24 0:00 stat -f /mnt/test $ kill -9 2511 (wait for a while; process is stuck in the kernel) $ ps -aux -q 2511 USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND root 2511 83.2 0.0 12892 1008 pts/0 R 12:24 30:01 stat -f /mnt/test By using 'hard' mount point means that cifs.ko will keep retrying indefinitely, however we must allow the process to be killed otherwise it would hang the system. Signed-off-by: Paulo Alcantara Cc: stable@vger.kernel.org Reviewed-by: Aurelien Aptel Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifssmb.c | 10 ++++++++-- fs/cifs/smb2pdu.c | 18 ++++++++++++------ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 7fd39ea6e22e..b5a436583469 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -150,8 +150,14 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) * greater than cifs socket timeout which is 7 seconds */ while (server->tcpStatus == CifsNeedReconnect) { - wait_event_interruptible_timeout(server->response_q, - (server->tcpStatus != CifsNeedReconnect), 10 * HZ); + rc = wait_event_interruptible_timeout(server->response_q, + (server->tcpStatus != CifsNeedReconnect), + 10 * HZ); + if (rc < 0) { + cifs_dbg(FYI, "%s: aborting reconnect due to a received" + " signal by the process\n", __func__); + return -ERESTARTSYS; + } /* are we still trying to reconnect? */ if (server->tcpStatus != CifsNeedReconnect) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5247b40e57f6..71b81980787f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -153,7 +153,7 @@ smb2_hdr_assemble(struct smb2_sync_hdr *shdr, __le16 smb2_cmd, static int smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) { - int rc = 0; + int rc; struct nls_table *nls_codepage; struct cifs_ses *ses; struct TCP_Server_Info *server; @@ -164,10 +164,10 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) * for those three - in the calling routine. */ if (tcon == NULL) - return rc; + return 0; if (smb2_command == SMB2_TREE_CONNECT) - return rc; + return 0; if (tcon->tidStatus == CifsExiting) { /* @@ -210,8 +210,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) return -EAGAIN; } - wait_event_interruptible_timeout(server->response_q, - (server->tcpStatus != CifsNeedReconnect), 10 * HZ); + rc = wait_event_interruptible_timeout(server->response_q, + (server->tcpStatus != CifsNeedReconnect), + 10 * HZ); + if (rc < 0) { + cifs_dbg(FYI, "%s: aborting reconnect due to a received" + " signal by the process\n", __func__); + return -ERESTARTSYS; + } /* are we still trying to reconnect? */ if (server->tcpStatus != CifsNeedReconnect) @@ -229,7 +235,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) } if (!tcon->ses->need_reconnect && !tcon->need_reconnect) - return rc; + return 0; nls_codepage = load_nls_default(); -- GitLab From 748144f35514aef14c4fdef5bcaa0db99cb9367a Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 5 Jul 2018 11:46:42 +0200 Subject: [PATCH 0074/1291] cifs: Fix slab-out-of-bounds in send_set_info() on SMB2 ACE setting commit f46ecbd97f508e68a7806291a139499794874f3d upstream. A "small" CIFS buffer is not big enough in general to hold a setacl request for SMB2, and we end up overflowing the buffer in send_set_info(). For instance: # mount.cifs //127.0.0.1/test /mnt/test -o username=test,password=test,nounix,cifsacl # touch /mnt/test/acltest # getcifsacl /mnt/test/acltest REVISION:0x1 CONTROL:0x9004 OWNER:S-1-5-21-2926364953-924364008-418108241-1000 GROUP:S-1-22-2-1001 ACL:S-1-5-21-2926364953-924364008-418108241-1000:ALLOWED/0x0/0x1e01ff ACL:S-1-22-2-1001:ALLOWED/0x0/R ACL:S-1-22-2-1001:ALLOWED/0x0/R ACL:S-1-5-21-2926364953-924364008-418108241-1000:ALLOWED/0x0/0x1e01ff ACL:S-1-1-0:ALLOWED/0x0/R # setcifsacl -a "ACL:S-1-22-2-1004:ALLOWED/0x0/R" /mnt/test/acltest this setacl will cause the following KASAN splat: [ 330.777927] BUG: KASAN: slab-out-of-bounds in send_set_info+0x4dd/0xc20 [cifs] [ 330.779696] Write of size 696 at addr ffff88010d5e2860 by task setcifsacl/1012 [ 330.781882] CPU: 1 PID: 1012 Comm: setcifsacl Not tainted 4.18.0-rc2+ #2 [ 330.783140] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 330.784395] Call Trace: [ 330.784789] dump_stack+0xc2/0x16b [ 330.786777] print_address_description+0x6a/0x270 [ 330.787520] kasan_report+0x258/0x380 [ 330.788845] memcpy+0x34/0x50 [ 330.789369] send_set_info+0x4dd/0xc20 [cifs] [ 330.799511] SMB2_set_acl+0x76/0xa0 [cifs] [ 330.801395] set_smb2_acl+0x7ac/0xf30 [cifs] [ 330.830888] cifs_xattr_set+0x963/0xe40 [cifs] [ 330.840367] __vfs_setxattr+0x84/0xb0 [ 330.842060] __vfs_setxattr_noperm+0xe6/0x370 [ 330.843848] vfs_setxattr+0xc2/0xd0 [ 330.845519] setxattr+0x258/0x320 [ 330.859211] path_setxattr+0x15b/0x1b0 [ 330.864392] __x64_sys_setxattr+0xc0/0x160 [ 330.866133] do_syscall_64+0x14e/0x4b0 [ 330.876631] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 330.878503] RIP: 0033:0x7ff2e507db0a [ 330.880151] Code: 48 8b 0d 89 93 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 bc 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 56 93 2c 00 f7 d8 64 89 01 48 [ 330.885358] RSP: 002b:00007ffdc4903c18 EFLAGS: 00000246 ORIG_RAX: 00000000000000bc [ 330.887733] RAX: ffffffffffffffda RBX: 000055d1170de140 RCX: 00007ff2e507db0a [ 330.890067] RDX: 000055d1170de7d0 RSI: 000055d115b39184 RDI: 00007ffdc4904818 [ 330.892410] RBP: 0000000000000001 R08: 0000000000000000 R09: 000055d1170de7e4 [ 330.894785] R10: 00000000000002b8 R11: 0000000000000246 R12: 0000000000000007 [ 330.897148] R13: 000055d1170de0c0 R14: 0000000000000008 R15: 000055d1170de550 [ 330.901057] Allocated by task 1012: [ 330.902888] kasan_kmalloc+0xa0/0xd0 [ 330.904714] kmem_cache_alloc+0xc8/0x1d0 [ 330.906615] mempool_alloc+0x11e/0x380 [ 330.908496] cifs_small_buf_get+0x35/0x60 [cifs] [ 330.910510] smb2_plain_req_init+0x4a/0xd60 [cifs] [ 330.912551] send_set_info+0x198/0xc20 [cifs] [ 330.914535] SMB2_set_acl+0x76/0xa0 [cifs] [ 330.916465] set_smb2_acl+0x7ac/0xf30 [cifs] [ 330.918453] cifs_xattr_set+0x963/0xe40 [cifs] [ 330.920426] __vfs_setxattr+0x84/0xb0 [ 330.922284] __vfs_setxattr_noperm+0xe6/0x370 [ 330.924213] vfs_setxattr+0xc2/0xd0 [ 330.926008] setxattr+0x258/0x320 [ 330.927762] path_setxattr+0x15b/0x1b0 [ 330.929592] __x64_sys_setxattr+0xc0/0x160 [ 330.931459] do_syscall_64+0x14e/0x4b0 [ 330.933314] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 330.936843] Freed by task 0: [ 330.938588] (stack is not available) [ 330.941886] The buggy address belongs to the object at ffff88010d5e2800 which belongs to the cache cifs_small_rq of size 448 [ 330.946362] The buggy address is located 96 bytes inside of 448-byte region [ffff88010d5e2800, ffff88010d5e29c0) [ 330.950722] The buggy address belongs to the page: [ 330.952789] page:ffffea0004357880 count:1 mapcount:0 mapping:ffff880108fdca80 index:0x0 compound_mapcount: 0 [ 330.955665] flags: 0x17ffffc0008100(slab|head) [ 330.957760] raw: 0017ffffc0008100 dead000000000100 dead000000000200 ffff880108fdca80 [ 330.960356] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 [ 330.963005] page dumped because: kasan: bad access detected [ 330.967039] Memory state around the buggy address: [ 330.969255] ffff88010d5e2880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 330.971833] ffff88010d5e2900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 330.974397] >ffff88010d5e2980: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc [ 330.976956] ^ [ 330.979226] ffff88010d5e2a00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 330.981755] ffff88010d5e2a80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 330.984225] ================================================================== Fix this by allocating a regular CIFS buffer in smb2_plain_req_init() if the request command is SMB2_SET_INFO. Reported-by: Jianhong Yin Fixes: 366ed846df60 ("cifs: Use smb 2 - 3 and cifsacl mount options setacl function") CC: Stable Signed-off-by: Stefano Brivio Reviewed-and-tested-by: Aurelien Aptel Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 71b81980787f..0480cd9a9e81 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -338,7 +338,10 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, return rc; /* BB eventually switch this to SMB2 specific small buf size */ - *request_buf = cifs_small_buf_get(); + if (smb2_command == SMB2_SET_INFO) + *request_buf = cifs_buf_get(); + else + *request_buf = cifs_small_buf_get(); if (*request_buf == NULL) { /* BB should we add a retry in here if not a writepage? */ return -ENOMEM; @@ -3168,7 +3171,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, } rc = SendReceive2(xid, ses, iov, num, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(req); + cifs_buf_release(req); rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base; if (rc != 0) -- GitLab From 3cb81bce2191df8701cc85fd7d360df4321cf566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 29 Jun 2018 16:27:10 +0200 Subject: [PATCH 0075/1291] drm: Use kvzalloc for allocating blob property memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 718b5406cd76f1aa6434311241b7febf0e8571ff upstream. The property size may be controlled by userspace, can be large (I've seen failure with order 4, i.e. 16 pages / 64 KB) and doesn't need to be physically contiguous. Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20180629142710.2069-1-michel@daenzer.net Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_property.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c index bc5128203056..78e630771214 100644 --- a/drivers/gpu/drm/drm_property.c +++ b/drivers/gpu/drm/drm_property.c @@ -516,7 +516,7 @@ static void drm_property_free_blob(struct kref *kref) drm_mode_object_unregister(blob->dev, &blob->base); - kfree(blob); + kvfree(blob); } /** @@ -543,7 +543,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length, if (!length || length > ULONG_MAX - sizeof(struct drm_property_blob)) return ERR_PTR(-EINVAL); - blob = kzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL); + blob = kvzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL); if (!blob) return ERR_PTR(-ENOMEM); @@ -559,7 +559,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length, ret = __drm_mode_object_add(dev, &blob->base, DRM_MODE_OBJECT_BLOB, true, drm_property_free_blob); if (ret) { - kfree(blob); + kvfree(blob); return ERR_PTR(-EINVAL); } -- GitLab From b541f470d4bda051356e8169d6b309bdd37437db Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 3 Jun 2018 16:40:54 +0200 Subject: [PATCH 0076/1291] drm/udl: fix display corruption of the last line commit 99ec9e77511dea55d81729fc80b6c63a61bfa8e0 upstream. The displaylink hardware has such a peculiarity that it doesn't render a command until next command is received. This produces occasional corruption, such as when setting 22x11 font on the console, only the first line of the cursor will be blinking if the cursor is located at some specific columns. When we end up with a repeating pixel, the driver has a bug that it leaves one uninitialized byte after the command (and this byte is enough to flush the command and render it - thus it fixes the screen corruption), however whe we end up with a non-repeating pixel, there is no byte appended and this results in temporary screen corruption. This patch fixes the screen corruption by always appending a byte 0xAF at the end of URB. It also removes the uninitialized byte. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/udl/udl_fb.c | 5 ++++- drivers/gpu/drm/udl/udl_transfer.c | 11 +++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index 2ebdc6d5a76e..d5583190f3e4 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -137,7 +137,10 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y, if (cmd > (char *) urb->transfer_buffer) { /* Send partial buffer remaining before exiting */ - int len = cmd - (char *) urb->transfer_buffer; + int len; + if (cmd < (char *) urb->transfer_buffer + urb->transfer_buffer_length) + *cmd++ = 0xAF; + len = cmd - (char *) urb->transfer_buffer; ret = udl_submit_urb(dev, urb, len); bytes_sent += len; } else diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c index 0c87b1ac6b68..b992644c17e6 100644 --- a/drivers/gpu/drm/udl/udl_transfer.c +++ b/drivers/gpu/drm/udl/udl_transfer.c @@ -153,11 +153,11 @@ static void udl_compress_hline16( raw_pixels_count_byte = cmd++; /* we'll know this later */ raw_pixel_start = pixel; - cmd_pixel_end = pixel + (min(MAX_CMD_PIXELS + 1, - min((int)(pixel_end - pixel) / bpp, - (int)(cmd_buffer_end - cmd) / 2))) * bpp; + cmd_pixel_end = pixel + min3(MAX_CMD_PIXELS + 1UL, + (unsigned long)(pixel_end - pixel) / bpp, + (unsigned long)(cmd_buffer_end - 1 - cmd) / 2) * bpp; - prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp); + prefetch_range((void *) pixel, cmd_pixel_end - pixel); pixel_val16 = get_pixel_val16(pixel, bpp); while (pixel < cmd_pixel_end) { @@ -193,6 +193,9 @@ static void udl_compress_hline16( if (pixel > raw_pixel_start) { /* finalize last RAW span */ *raw_pixels_count_byte = ((pixel-raw_pixel_start) / bpp) & 0xFF; + } else { + /* undo unused byte */ + cmd--; } *cmd_pixels_count_byte = ((pixel - cmd_pixel_start) / bpp) & 0xFF; -- GitLab From 0321e68838d7ba2528b367b879b2fcf9d96a2099 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 16 Jun 2018 20:21:45 -0400 Subject: [PATCH 0077/1291] jbd2: don't mark block as modified if the handle is out of credits commit e09463f220ca9a1a1ecfda84fcda658f99a1f12a upstream. Do not set the b_modified flag in block's journal head should not until after we're sure that jbd2_journal_dirty_metadat() will not abort with an error due to there not being enough space reserved in the jbd2 handle. Otherwise, future attempts to modify the buffer may lead a large number of spurious errors and warnings. This addresses CVE-2018-10883. https://bugzilla.kernel.org/show_bug.cgi?id=200071 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/transaction.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 07793e25c976..e42736c1fdc8 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1366,6 +1366,13 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) if (jh->b_transaction == transaction && jh->b_jlist != BJ_Metadata) { jbd_lock_bh_state(bh); + if (jh->b_transaction == transaction && + jh->b_jlist != BJ_Metadata) + pr_err("JBD2: assertion failure: h_type=%u " + "h_line_no=%u block_no=%llu jlist=%u\n", + handle->h_type, handle->h_line_no, + (unsigned long long) bh->b_blocknr, + jh->b_jlist); J_ASSERT_JH(jh, jh->b_transaction != transaction || jh->b_jlist == BJ_Metadata); jbd_unlock_bh_state(bh); @@ -1385,11 +1392,11 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) * of the transaction. This needs to be done * once a transaction -bzzz */ - jh->b_modified = 1; if (handle->h_buffer_credits <= 0) { ret = -ENOSPC; goto out_unlock_bh; } + jh->b_modified = 1; handle->h_buffer_credits--; } -- GitLab From 0dc148230f3827f49b1e2a72d14cbad0c5c63e86 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 13 Jun 2018 00:23:11 -0400 Subject: [PATCH 0078/1291] ext4: add corruption check in ext4_xattr_set_entry() commit 5369a762c882c0b6e9599e4ebbb3a9ba9eee7e2d upstream. In theory this should have been caught earlier when the xattr list was verified, but in case it got missed, it's simple enough to add check to make sure we don't overrun the xattr buffer. This addresses CVE-2018-10879. https://bugzilla.kernel.org/show_bug.cgi?id=200001 Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index ed1cf24a7831..e5abf939989a 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1559,7 +1559,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, handle_t *handle, struct inode *inode, bool is_block) { - struct ext4_xattr_entry *last; + struct ext4_xattr_entry *last, *next; struct ext4_xattr_entry *here = s->here; size_t min_offs = s->end - s->base, name_len = strlen(i->name); int in_inode = i->in_inode; @@ -1594,7 +1594,13 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, /* Compute min_offs and last. */ last = s->first; - for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + for (; !IS_LAST_ENTRY(last); last = next) { + next = EXT4_XATTR_NEXT(last); + if ((void *)next >= s->end) { + EXT4_ERROR_INODE(inode, "corrupted xattr entries"); + ret = -EFSCORRUPTED; + goto out; + } if (!last->e_value_inum && last->e_value_size) { size_t offs = le16_to_cpu(last->e_value_offs); if (offs < min_offs) -- GitLab From 3150e8913b957d71398511a0580606e181153d10 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 13 Jun 2018 00:51:28 -0400 Subject: [PATCH 0079/1291] ext4: always verify the magic number in xattr blocks commit 513f86d73855ce556ea9522b6bfd79f87356dc3a upstream. If there an inode points to a block which is also some other type of metadata block (such as a block allocation bitmap), the buffer_verified flag can be set when it was validated as that other metadata block type; however, it would make a really terrible external attribute block. The reason why we use the verified flag is to avoid constantly reverifying the block. However, it doesn't take much overhead to make sure the magic number of the xattr block is correct, and this will avoid potential crashes. This addresses CVE-2018-10879. https://bugzilla.kernel.org/show_bug.cgi?id=200001 Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e5abf939989a..871278eac8ba 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -229,12 +229,12 @@ __ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh, { int error = -EFSCORRUPTED; - if (buffer_verified(bh)) - return 0; - if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) goto errout; + if (buffer_verified(bh)) + return 0; + error = -EFSBADCRC; if (!ext4_xattr_block_csum_verify(inode, bh)) goto errout; -- GitLab From ac93c718365ac6ea9d7631641c8dec867d623491 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 13 Jun 2018 23:08:26 -0400 Subject: [PATCH 0080/1291] ext4: make sure bitmaps and the inode table don't overlap with bg descriptors commit 77260807d1170a8cf35dbb06e07461a655f67eee upstream. It's really bad when the allocation bitmaps and the inode table overlap with the block group descriptors, since it causes random corruption of the bg descriptors. So we really want to head those off at the pass. https://bugzilla.kernel.org/show_bug.cgi?id=199865 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ec74d06fa24a..3559489a3a99 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2301,6 +2301,7 @@ static int ext4_check_descriptors(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); ext4_fsblk_t last_block; + ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0) + 1; ext4_fsblk_t block_bitmap; ext4_fsblk_t inode_bitmap; ext4_fsblk_t inode_table; @@ -2333,6 +2334,14 @@ static int ext4_check_descriptors(struct super_block *sb, if (!sb_rdonly(sb)) return 0; } + if (block_bitmap >= sb_block + 1 && + block_bitmap <= last_bg_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Block bitmap for group %u overlaps " + "block group descriptors", i); + if (!sb_rdonly(sb)) + return 0; + } if (block_bitmap < first_block || block_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u not in group " @@ -2347,6 +2356,14 @@ static int ext4_check_descriptors(struct super_block *sb, if (!sb_rdonly(sb)) return 0; } + if (inode_bitmap >= sb_block + 1 && + inode_bitmap <= last_bg_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Inode bitmap for group %u overlaps " + "block group descriptors", i); + if (!sb_rdonly(sb)) + return 0; + } if (inode_bitmap < first_block || inode_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u not in group " @@ -2361,6 +2378,14 @@ static int ext4_check_descriptors(struct super_block *sb, if (!sb_rdonly(sb)) return 0; } + if (inode_table >= sb_block + 1 && + inode_table <= last_bg_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Inode table for group %u overlaps " + "block group descriptors", i); + if (!sb_rdonly(sb)) + return 0; + } if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " -- GitLab From ac48bb9bc0a32f5a4432be1645b57607f8c46aa7 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 13 Jun 2018 23:00:48 -0400 Subject: [PATCH 0081/1291] ext4: always check block group bounds in ext4_init_block_bitmap() commit 819b23f1c501b17b9694325471789e6b5cc2d0d2 upstream. Regardless of whether the flex_bg feature is set, we should always check to make sure the bits we are setting in the block bitmap are within the block group bounds. https://bugzilla.kernel.org/show_bug.cgi?id=199865 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/balloc.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 58db8109defa..3149aee77b6b 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -184,7 +184,6 @@ static int ext4_init_block_bitmap(struct super_block *sb, unsigned int bit, bit_max; struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t start, tmp; - int flex_bg = 0; struct ext4_group_info *grp; J_ASSERT_BH(bh, buffer_locked(bh)); @@ -217,22 +216,19 @@ static int ext4_init_block_bitmap(struct super_block *sb, start = ext4_group_first_block_no(sb, block_group); - if (ext4_has_feature_flex_bg(sb)) - flex_bg = 1; - /* Set bits for block and inode bitmaps, and inode table */ tmp = ext4_block_bitmap(sb, gdp); - if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + if (ext4_block_in_group(sb, tmp, block_group)) ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); tmp = ext4_inode_bitmap(sb, gdp); - if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + if (ext4_block_in_group(sb, tmp, block_group)) ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); tmp = ext4_inode_table(sb, gdp); for (; tmp < ext4_inode_table(sb, gdp) + sbi->s_itb_per_group; tmp++) { - if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + if (ext4_block_in_group(sb, tmp, block_group)) ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); } -- GitLab From 44a4bc970bfae625d0ec9ecdfefc88c9d93dfe6c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 14 Jun 2018 00:58:00 -0400 Subject: [PATCH 0082/1291] ext4: only look at the bg_flags field if it is valid commit 8844618d8aa7a9973e7b527d038a2a589665002c upstream. The bg_flags field in the block group descripts is only valid if the uninit_bg or metadata_csum feature is enabled. We were not consistently looking at this field; fix this. Also block group #0 must never have uninitialized allocation bitmaps, or need to be zeroed, since that's where the root inode, and other special inodes are set up. Check for these conditions and mark the file system as corrupted if they are detected. This addresses CVE-2018-10876. https://bugzilla.kernel.org/show_bug.cgi?id=199403 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/balloc.c | 11 ++++++++++- fs/ext4/ialloc.c | 14 ++++++++++++-- fs/ext4/mballoc.c | 6 ++++-- fs/ext4/super.c | 11 ++++++++++- 4 files changed, 36 insertions(+), 6 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 3149aee77b6b..9c9eafd6bd76 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -451,7 +451,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) goto verify; } ext4_lock_group(sb, block_group); - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { + if (block_group == 0) { + ext4_unlock_group(sb, block_group); + unlock_buffer(bh); + ext4_error(sb, "Block bitmap for bg 0 marked " + "uninitialized"); + err = -EFSCORRUPTED; + goto out; + } err = ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f420124ac035..95341bc2b3b7 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -155,7 +155,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) } ext4_lock_group(sb, block_group); - if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) { + if (block_group == 0) { + ext4_unlock_group(sb, block_group); + unlock_buffer(bh); + ext4_error(sb, "Inode bitmap for bg 0 marked " + "uninitialized"); + err = -EFSCORRUPTED; + goto out; + } memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); @@ -1000,7 +1009,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, /* recheck and clear flag under lock if we still need to */ ext4_lock_group(sb, group); - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, group, gdp)); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 701085620cd8..048c586d9a8b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2456,7 +2456,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, * initialize bb_free to be able to skip * empty groups without initialization */ - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { meta_group_info[i]->bb_free = ext4_free_clusters_after_init(sb, group, desc); } else { @@ -3023,7 +3024,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, #endif ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3559489a3a99..fefcfa9fe408 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3095,13 +3095,22 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; struct ext4_group_desc *gdp = NULL; + if (!ext4_has_group_desc_csum(sb)) + return ngroups; + for (group = 0; group < ngroups; group++) { gdp = ext4_get_group_desc(sb, group, NULL); if (!gdp) continue; - if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) + continue; + if (group != 0) break; + ext4_error(sb, "Inode table for bg 0 marked as " + "needing zeroing"); + if (sb_rdonly(sb)) + return ngroups; } return group; -- GitLab From d69a9df614fc68741efcb0fcc020f05caa99d668 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 14 Jun 2018 12:55:10 -0400 Subject: [PATCH 0083/1291] ext4: verify the depth of extent tree in ext4_find_extent() commit bc890a60247171294acc0bd67d211fa4b88d40ba upstream. If there is a corupted file system where the claimed depth of the extent tree is -1, this can cause a massive buffer overrun leading to sadness. This addresses CVE-2018-10877. https://bugzilla.kernel.org/show_bug.cgi?id=199417 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4_extents.h | 1 + fs/ext4/extents.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 8ecf84b8f5a1..a284fb28944b 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -103,6 +103,7 @@ struct ext4_extent_header { }; #define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) +#define EXT4_MAX_EXTENT_DEPTH 5 #define EXT4_EXTENT_TAIL_OFFSET(hdr) \ (sizeof(struct ext4_extent_header) + \ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 883e89a903d1..5592b7726241 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -881,6 +881,12 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, eh = ext_inode_hdr(inode); depth = ext_depth(inode); + if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) { + EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d", + depth); + ret = -EFSCORRUPTED; + goto err; + } if (path) { ext4_ext_drop_refs(path); -- GitLab From 64804502d0e957bc594de35872381a808bb986cb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 15 Jun 2018 12:27:16 -0400 Subject: [PATCH 0084/1291] ext4: include the illegal physical block in the bad map ext4_error msg commit bdbd6ce01a70f02e9373a584d0ae9538dcf0a121 upstream. Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bd6453e78992..5b28153eb0fd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -401,9 +401,9 @@ static int __check_block_validity(struct inode *inode, const char *func, if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, map->m_len)) { ext4_error_inode(inode, func, line, map->m_pblk, - "lblock %lu mapped to illegal pblock " + "lblock %lu mapped to illegal pblock %llu " "(length %d)", (unsigned long) map->m_lblk, - map->m_len); + map->m_pblk, map->m_len); return -EFSCORRUPTED; } return 0; -- GitLab From deb465ec750b80776cc4ac5b92b72c0a71fd4f0b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 15 Jun 2018 12:28:16 -0400 Subject: [PATCH 0085/1291] ext4: clear i_data in ext4_inode_info when removing inline data commit 6e8ab72a812396996035a37e5ca4b3b99b5d214b upstream. When converting from an inode from storing the data in-line to a data block, ext4_destroy_inline_data_nolock() was only clearing the on-disk copy of the i_blocks[] array. It was not clearing copy of the i_blocks[] in ext4_inode_info, in i_data[], which is the copy actually used by ext4_map_blocks(). This didn't matter much if we are using extents, since the extents header would be invalid and thus the extents could would re-initialize the extents tree. But if we are using indirect blocks, the previous contents of the i_blocks array will be treated as block numbers, with potentially catastrophic results to the file system integrity and/or user data. This gets worse if the file system is using a 1k block size and s_first_data is zero, but even without this, the file system can get quite badly corrupted. This addresses CVE-2018-10881. https://bugzilla.kernel.org/show_bug.cgi?id=200015 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 8f5dc243effd..afdef31ff728 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -443,6 +443,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle, memset((void *)ext4_raw_inode(&is.iloc)->i_block, 0, EXT4_MIN_INLINE_DATA_SIZE); + memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE); if (ext4_has_feature_extents(inode->i_sb)) { if (S_ISDIR(inode->i_mode) || -- GitLab From 8a9ef17c0dc93def47e17b227ada95c682592a1d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 16 Jun 2018 15:40:48 -0400 Subject: [PATCH 0086/1291] ext4: never move the system.data xattr out of the inode body commit 8cdb5240ec5928b20490a2bb34cb87e9a5f40226 upstream. When expanding the extra isize space, we must never move the system.data xattr out of the inode body. For performance reasons, it doesn't make any sense, and the inline data implementation assumes that system.data xattr is never in the external xattr block. This addresses CVE-2018-10880 https://bugzilla.kernel.org/show_bug.cgi?id=200005 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 871278eac8ba..e2fa3adb4b7b 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2656,6 +2656,11 @@ static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode, last = IFIRST(header); /* Find the entry best suited to be pushed into EA block */ for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + /* never move system.data out of the inode */ + if ((last->e_name_len == 4) && + (last->e_name_index == EXT4_XATTR_INDEX_SYSTEM) && + !memcmp(last->e_name, "data", 4)) + continue; total_size = EXT4_XATTR_LEN(last->e_name_len); if (!last->e_value_inum) total_size += EXT4_XATTR_SIZE( -- GitLab From 02945e49dc2026459e069467ec64e3fbecda844f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 16 Jun 2018 23:41:59 -0400 Subject: [PATCH 0087/1291] ext4: avoid running out of journal credits when appending to an inline file commit 8bc1379b82b8e809eef77a9fedbb75c6c297be19 upstream. Use a separate journal transaction if it turns out that we need to convert an inline file to use an data block. Otherwise we could end up failing due to not having journal credits. This addresses CVE-2018-10883. https://bugzilla.kernel.org/show_bug.cgi?id=200071 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 3 --- fs/ext4/inline.c | 38 +------------------------------------- fs/ext4/xattr.c | 19 ++----------------- 3 files changed, 3 insertions(+), 57 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 58a0304566db..db389611f8bc 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3049,9 +3049,6 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode, extern int ext4_inline_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int *has_inline, __u64 start, __u64 len); -extern int ext4_try_to_evict_inline_data(handle_t *handle, - struct inode *inode, - int needed); extern int ext4_inline_data_truncate(struct inode *inode, int *has_inline); extern int ext4_convert_inline_data(struct inode *inode); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index afdef31ff728..7d498f4a3f90 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -893,11 +893,11 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping, flags |= AOP_FLAG_NOFS; if (ret == -ENOSPC) { + ext4_journal_stop(handle); ret = ext4_da_convert_inline_data_to_extent(mapping, inode, flags, fsdata); - ext4_journal_stop(handle); if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry_journal; @@ -1865,42 +1865,6 @@ int ext4_inline_data_fiemap(struct inode *inode, return (error < 0 ? error : 0); } -/* - * Called during xattr set, and if we can sparse space 'needed', - * just create the extent tree evict the data to the outer block. - * - * We use jbd2 instead of page cache to move data to the 1st block - * so that the whole transaction can be committed as a whole and - * the data isn't lost because of the delayed page cache write. - */ -int ext4_try_to_evict_inline_data(handle_t *handle, - struct inode *inode, - int needed) -{ - int error; - struct ext4_xattr_entry *entry; - struct ext4_inode *raw_inode; - struct ext4_iloc iloc; - - error = ext4_get_inode_loc(inode, &iloc); - if (error) - return error; - - raw_inode = ext4_raw_inode(&iloc); - entry = (struct ext4_xattr_entry *)((void *)raw_inode + - EXT4_I(inode)->i_inline_off); - if (EXT4_XATTR_LEN(entry->e_name_len) + - EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) < needed) { - error = -ENOSPC; - goto out; - } - - error = ext4_convert_inline_data_nolock(handle, inode, &iloc); -out: - brelse(iloc.bh); - return error; -} - int ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e2fa3adb4b7b..c7c8c16ccd93 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2211,23 +2211,8 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, if (EXT4_I(inode)->i_extra_isize == 0) return -ENOSPC; error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); - if (error) { - if (error == -ENOSPC && - ext4_has_inline_data(inode)) { - error = ext4_try_to_evict_inline_data(handle, inode, - EXT4_XATTR_LEN(strlen(i->name) + - EXT4_XATTR_SIZE(i->value_len))); - if (error) - return error; - error = ext4_xattr_ibody_find(inode, i, is); - if (error) - return error; - error = ext4_xattr_set_entry(i, s, handle, inode, - false /* is_block */); - } - if (error) - return error; - } + if (error) + return error; header = IHDR(inode, ext4_raw_inode(&is->iloc)); if (!IS_LAST_ENTRY(s->first)) { header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); -- GitLab From c24aab6d86640ccf321b87be6096319f55b16274 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 17 Jun 2018 00:41:14 -0400 Subject: [PATCH 0088/1291] ext4: add more inode number paranoia checks commit c37e9e013469521d9adb932d17a1795c139b36db upstream. If there is a directory entry pointing to a system inode (such as a journal inode), complain and declare the file system to be corrupted. Also, if the superblock's first inode number field is too small, refuse to mount the file system. This addresses CVE-2018-10882. https://bugzilla.kernel.org/show_bug.cgi?id=200069 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 5 ----- fs/ext4/inode.c | 3 ++- fs/ext4/super.c | 5 +++++ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index db389611f8bc..0abb30d19fa1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1542,11 +1542,6 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode) static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) { return ino == EXT4_ROOT_INO || - ino == EXT4_USR_QUOTA_INO || - ino == EXT4_GRP_QUOTA_INO || - ino == EXT4_BOOT_LOADER_INO || - ino == EXT4_JOURNAL_INO || - ino == EXT4_RESIZE_INO || (ino >= EXT4_FIRST_INO(sb) && ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5b28153eb0fd..c2efe4d2ad87 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4455,7 +4455,8 @@ static int __ext4_get_inode_loc(struct inode *inode, int inodes_per_block, inode_offset; iloc->bh = NULL; - if (!ext4_valid_inum(sb, inode->i_ino)) + if (inode->i_ino < EXT4_ROOT_INO || + inode->i_ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) return -EFSCORRUPTED; iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fefcfa9fe408..6933efbb582f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3811,6 +3811,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } else { sbi->s_inode_size = le16_to_cpu(es->s_inode_size); sbi->s_first_ino = le32_to_cpu(es->s_first_ino); + if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) { + ext4_msg(sb, KERN_ERR, "invalid first ino: %u", + sbi->s_first_ino); + goto failed_mount; + } if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || (!is_power_of_2(sbi->s_inode_size)) || (sbi->s_inode_size > blocksize)) { -- GitLab From 54bf664ae4fa0b27d8fefc64514509e63dd89929 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 17 Jun 2018 18:11:20 -0400 Subject: [PATCH 0089/1291] ext4: add more mount time checks of the superblock commit bfe0a5f47ada40d7984de67e59a7d3390b9b9ecc upstream. The kernel's ext4 mount-time checks were more permissive than e2fsprogs's libext2fs checks when opening a file system. The superblock is considered too insane for debugfs or e2fsck to operate on it, the kernel has no business trying to mount it. This will make file system fuzzing tools work harder, but the failure cases that they find will be more useful and be easier to evaluate. Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6933efbb582f..cc065ae90d4f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3749,6 +3749,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) le32_to_cpu(es->s_log_block_size)); goto failed_mount; } + if (le32_to_cpu(es->s_log_cluster_size) > + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log cluster size: %u", + le32_to_cpu(es->s_log_cluster_size)); + goto failed_mount; + } if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { ext4_msg(sb, KERN_ERR, @@ -3892,13 +3899,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "block size (%d)", clustersize, blocksize); goto failed_mount; } - if (le32_to_cpu(es->s_log_cluster_size) > - (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log cluster size: %u", - le32_to_cpu(es->s_log_cluster_size)); - goto failed_mount; - } sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - le32_to_cpu(es->s_log_block_size); sbi->s_clusters_per_group = @@ -3919,10 +3919,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } } else { if (clustersize != blocksize) { - ext4_warning(sb, "fragment/cluster size (%d) != " - "block size (%d)", clustersize, - blocksize); - clustersize = blocksize; + ext4_msg(sb, KERN_ERR, + "fragment/cluster size (%d) != " + "block size (%d)", clustersize, blocksize); + goto failed_mount; } if (sbi->s_blocks_per_group > blocksize * 8) { ext4_msg(sb, KERN_ERR, @@ -3976,6 +3976,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_blocks_count(es)); goto failed_mount; } + if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) && + (sbi->s_cluster_ratio == 1)) { + ext4_msg(sb, KERN_WARNING, "bad geometry: first data " + "block is 0 with a 1k block and cluster size"); + goto failed_mount; + } + blocks_count = (ext4_blocks_count(es) - le32_to_cpu(es->s_first_data_block) + EXT4_BLOCKS_PER_GROUP(sb) - 1); @@ -4011,6 +4018,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ret = -ENOMEM; goto failed_mount; } + if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != + le32_to_cpu(es->s_inodes_count)) { + ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", + le32_to_cpu(es->s_inodes_count), + ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); + ret = -EINVAL; + goto failed_mount; + } bgl_lock_init(sbi->s_blockgroup_lock); -- GitLab From fba3230595cb7c27ba27a4e100cdf9e4b4b273b9 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Mon, 2 Jul 2018 18:45:18 -0400 Subject: [PATCH 0090/1291] ext4: check superblock mapped prior to committing commit a17712c8e4be4fa5404d20e9cd3b2b21eae7bc56 upstream. This patch attempts to close a hole leading to a BUG seen with hot removals during writes [1]. A block device (NVME namespace in this test case) is formatted to EXT4 without partitions. It's mounted and write I/O is run to a file, then the device is hot removed from the slot. The superblock attempts to be written to the drive which is no longer present. The typical chain of events leading to the BUG: ext4_commit_super() __sync_dirty_buffer() submit_bh() submit_bh_wbc() BUG_ON(!buffer_mapped(bh)); This fix checks for the superblock's buffer head being mapped prior to syncing. [1] https://www.spinics.net/lists/linux-ext4/msg56527.html Signed-off-by: Jon Derrick Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cc065ae90d4f..83ba37be4702 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4754,6 +4754,14 @@ static int ext4_commit_super(struct super_block *sb, int sync) if (!sbh || block_device_ejected(sb)) return error; + + /* + * The superblock bh should be mapped, but it might not be if the + * device was hot-removed. Not much we can do but fail the I/O. + */ + if (!buffer_mapped(sbh)) + return error; + /* * If the file system is mounted read-only, don't update the * superblock write time. This avoids updating the superblock -- GitLab From 3e3f1310c6065f38538ee5b64a2610212110e148 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 16 Oct 2017 15:59:09 +0200 Subject: [PATCH 0091/1291] block: factor out __blkdev_issue_zero_pages() commit 425a4dba7953e35ffd096771973add6d2f40d2ed upstream. blkdev_issue_zeroout() will use this in !BLKDEV_ZERO_NOFALLBACK case. Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Ilya Dryomov Signed-off-by: Jens Axboe Cc: Janne Huttunen Signed-off-by: Greg Kroah-Hartman --- block/blk-lib.c | 63 +++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index 63fb971d6574..9e86a4871b0f 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -275,6 +275,40 @@ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) return min(pages, (sector_t)BIO_MAX_PAGES); } +static int __blkdev_issue_zero_pages(struct block_device *bdev, + sector_t sector, sector_t nr_sects, gfp_t gfp_mask, + struct bio **biop) +{ + struct request_queue *q = bdev_get_queue(bdev); + struct bio *bio = *biop; + int bi_size = 0; + unsigned int sz; + + if (!q) + return -ENXIO; + + while (nr_sects != 0) { + bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects), + gfp_mask); + bio->bi_iter.bi_sector = sector; + bio_set_dev(bio, bdev); + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + + while (nr_sects != 0) { + sz = min((sector_t) PAGE_SIZE, nr_sects << 9); + bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0); + nr_sects -= bi_size >> 9; + sector += bi_size >> 9; + if (bi_size < sz) + break; + } + cond_resched(); + } + + *biop = bio; + return 0; +} + /** * __blkdev_issue_zeroout - generate number of zero filed write bios * @bdev: blockdev to issue @@ -305,9 +339,6 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, unsigned flags) { int ret; - int bi_size = 0; - struct bio *bio = *biop; - unsigned int sz; sector_t bs_mask; bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; @@ -317,30 +348,10 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask, biop, flags); if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK)) - goto out; - - ret = 0; - while (nr_sects != 0) { - bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects), - gfp_mask); - bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, bdev); - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - - while (nr_sects != 0) { - sz = min((sector_t) PAGE_SIZE, nr_sects << 9); - bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0); - nr_sects -= bi_size >> 9; - sector += bi_size >> 9; - if (bi_size < sz) - break; - } - cond_resched(); - } + return ret; - *biop = bio; -out: - return ret; + return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, + biop); } EXPORT_SYMBOL(__blkdev_issue_zeroout); -- GitLab From c894755d1bc8beaff5dec0dc493178209118630f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 16 Oct 2017 15:59:10 +0200 Subject: [PATCH 0092/1291] block: cope with WRITE ZEROES failing in blkdev_issue_zeroout() commit d5ce4c31d6df518dd8f63bbae20d7423c5018a6c upstream. sd_config_write_same() ignores ->max_ws_blocks == 0 and resets it to permit trying WRITE SAME on older SCSI devices, unless ->no_write_same is set. Because REQ_OP_WRITE_ZEROES is implemented in terms of WRITE SAME, blkdev_issue_zeroout() may fail with -EREMOTEIO: $ fallocate -zn -l 1k /dev/sdg fallocate: fallocate failed: Remote I/O error $ fallocate -zn -l 1k /dev/sdg # OK $ fallocate -zn -l 1k /dev/sdg # OK The following calls succeed because sd_done() sets ->no_write_same in response to a sense that would become BLK_STS_TARGET/-EREMOTEIO, causing __blkdev_issue_zeroout() to fall back to generating ZERO_PAGE bios. This means blkdev_issue_zeroout() must cope with WRITE ZEROES failing and fall back to manually zeroing, unless BLKDEV_ZERO_NOFALLBACK is specified. For BLKDEV_ZERO_NOFALLBACK case, return -EOPNOTSUPP if sd_done() has just set ->no_write_same thus indicating lack of offload support. Fixes: c20cfc27a473 ("block: stop using blkdev_issue_write_same for zeroing") Cc: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Ilya Dryomov Signed-off-by: Jens Axboe Cc: Janne Huttunen Signed-off-by: Greg Kroah-Hartman --- block/blk-lib.c | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index 9e86a4871b0f..2bc544ce3d2e 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -322,12 +322,6 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev, * Zero-fill a block range, either using hardware offload or by explicitly * writing zeroes to the device. * - * Note that this function may fail with -EOPNOTSUPP if the driver signals - * zeroing offload support, but the device fails to process the command (for - * some devices there is no non-destructive way to verify whether this - * operation is actually supported). In this case the caller should call - * retry the call to blkdev_issue_zeroout() and the fallback path will be used. - * * If a device is using logical block provisioning, the underlying space will * not be released if %flags contains BLKDEV_ZERO_NOUNMAP. * @@ -371,18 +365,49 @@ EXPORT_SYMBOL(__blkdev_issue_zeroout); int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned flags) { - int ret; - struct bio *bio = NULL; + int ret = 0; + sector_t bs_mask; + struct bio *bio; struct blk_plug plug; + bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev); + bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; + if ((sector | nr_sects) & bs_mask) + return -EINVAL; + +retry: + bio = NULL; blk_start_plug(&plug); - ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask, - &bio, flags); + if (try_write_zeroes) { + ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, + gfp_mask, &bio, flags); + } else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) { + ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects, + gfp_mask, &bio); + } else { + /* No zeroing offload support */ + ret = -EOPNOTSUPP; + } if (ret == 0 && bio) { ret = submit_bio_wait(bio); bio_put(bio); } blk_finish_plug(&plug); + if (ret && try_write_zeroes) { + if (!(flags & BLKDEV_ZERO_NOFALLBACK)) { + try_write_zeroes = false; + goto retry; + } + if (!bdev_write_zeroes_sectors(bdev)) { + /* + * Zeroing offload support was indicated, but the + * device reported ILLEGAL REQUEST (for some devices + * there is no non-destructive way to verify whether + * WRITE ZEROES is actually supported). + */ + ret = -EOPNOTSUPP; + } + } return ret; } -- GitLab From 702027291bf5b6a0d70eca15adff0a68bfcd8430 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 22 Jun 2018 12:25:49 -0400 Subject: [PATCH 0093/1291] HID: i2c-hid: Fix "incomplete report" noise commit ef6eaf27274c0351f7059163918f3795da13199c upstream. Commit ac75a041048b ("HID: i2c-hid: fix size check and type usage") started writing messages when the ret_size is <= 2 from i2c_master_recv. However, my device i2c-DLL07D1 returns 2 for a short period of time (~0.5s) after I stop moving the pointing stick or touchpad. It varies, but you get ~50 messages each time which spams the log hard. [ 95.925055] i2c_hid i2c-DLL07D1:01: i2c_hid_get_input: incomplete report (83/2) This has also been observed with a i2c-ALP0017. [ 1781.266353] i2c_hid i2c-ALP0017:00: i2c_hid_get_input: incomplete report (30/2) Only print the message when ret_size is totally invalid and less than 2 to cut down on the log spam. Fixes: ac75a041048b ("HID: i2c-hid: fix size check and type usage") Reported-by: John Smith Cc: stable@vger.kernel.org Signed-off-by: Jason Andryuk Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 3535073a9a7d..d92827556389 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -476,7 +476,7 @@ static void i2c_hid_get_input(struct i2c_hid *ihid) return; } - if ((ret_size > size) || (ret_size <= 2)) { + if ((ret_size > size) || (ret_size < 2)) { dev_err(&ihid->client->dev, "%s: incomplete report (%d/%d)\n", __func__, size, ret_size); return; -- GitLab From c1d21fe74c2554f5031f6ceb354ee3ce2ccf475f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 29 Jun 2018 17:08:44 -0500 Subject: [PATCH 0094/1291] HID: hiddev: fix potential Spectre v1 commit 4f65245f2d178b9cba48350620d76faa4a098841 upstream. uref->field_index, uref->usage_index, finfo.field_index and cinfo.index can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/hid/usbhid/hiddev.c:473 hiddev_ioctl_usage() warn: potential spectre issue 'report->field' (local cap) drivers/hid/usbhid/hiddev.c:477 hiddev_ioctl_usage() warn: potential spectre issue 'field->usage' (local cap) drivers/hid/usbhid/hiddev.c:757 hiddev_ioctl() warn: potential spectre issue 'report->field' (local cap) drivers/hid/usbhid/hiddev.c:801 hiddev_ioctl() warn: potential spectre issue 'hid->collection' (local cap) Fix this by sanitizing such structure fields before using them to index report->field, field->usage and hid->collection Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/usbhid/hiddev.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 7d749b19c27c..cf307bdc3d53 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "usbhid.h" #ifdef CONFIG_USB_DYNAMIC_MINORS @@ -469,10 +470,14 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd, if (uref->field_index >= report->maxfield) goto inval; + uref->field_index = array_index_nospec(uref->field_index, + report->maxfield); field = report->field[uref->field_index]; if (uref->usage_index >= field->maxusage) goto inval; + uref->usage_index = array_index_nospec(uref->usage_index, + field->maxusage); uref->usage_code = field->usage[uref->usage_index].hid; @@ -499,6 +504,8 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd, if (uref->field_index >= report->maxfield) goto inval; + uref->field_index = array_index_nospec(uref->field_index, + report->maxfield); field = report->field[uref->field_index]; @@ -753,6 +760,8 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (finfo.field_index >= report->maxfield) break; + finfo.field_index = array_index_nospec(finfo.field_index, + report->maxfield); field = report->field[finfo.field_index]; memset(&finfo, 0, sizeof(finfo)); @@ -797,6 +806,8 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (cinfo.index >= hid->maxcollection) break; + cinfo.index = array_index_nospec(cinfo.index, + hid->maxcollection); cinfo.type = hid->collection[cinfo.index].type; cinfo.usage = hid->collection[cinfo.index].usage; -- GitLab From 50b4d984f55e7e8d75f75da6803505ca3c122cef Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 2 Jul 2018 16:59:37 -0700 Subject: [PATCH 0095/1291] HID: debug: check length before copy_to_user() commit 717adfdaf14704fd3ec7fa2c04520c0723247eac upstream. If our length is greater than the size of the buffer, we overflow the buffer Cc: stable@vger.kernel.org Signed-off-by: Daniel Rosenberg Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-debug.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 5271db593478..ae8c8e66a6c4 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -1154,6 +1154,8 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, goto out; if (list->tail > list->head) { len = list->tail - list->head; + if (len > count) + len = count; if (copy_to_user(buffer + ret, &list->hid_debug_buf[list->head], len)) { ret = -EFAULT; @@ -1163,6 +1165,8 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, list->head += len; } else { len = HID_DEBUG_BUFSIZE - list->head; + if (len > count) + len = count; if (copy_to_user(buffer, &list->hid_debug_buf[list->head], len)) { ret = -EFAULT; @@ -1170,7 +1174,9 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, } list->head = 0; ret += len; - goto copy_rest; + count -= len; + if (count > 0) + goto copy_rest; } } -- GitLab From 07a1c2d1131b2675776480118d0855845cfffaef Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Wed, 1 Nov 2017 10:14:51 +0600 Subject: [PATCH 0096/1291] irq/core: Fix boot crash when the irqaffinity= boot parameter is passed on CPUMASK_OFFSTACK=y kernels(v1) commit 10d94ff4d558b96bfc4f55bb0051ae4d938246fe upstream. When the irqaffinity= kernel parameter is passed in a CPUMASK_OFFSTACK=y kernel, it fails to boot, because zalloc_cpumask_var() cannot be used before initializing the slab allocator to allocate a cpumask. So, use alloc_bootmem_cpumask_var() instead. Also do some cleanups while at it: in init_irq_default_affinity() remove an #ifdef via using cpumask_available(). Signed-off-by: Rakib Mullick Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171026045800.27087-1-rakib.mullick@gmail.com Link: http://lkml.kernel.org/r/20171101041451.12581-1-rakib.mullick@gmail.com Signed-off-by: Ingo Molnar Cc: Janne Huttunen Signed-off-by: Greg Kroah-Hartman --- kernel/irq/irqdesc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 82afb7ed369f..e97bbae947f0 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -27,7 +27,7 @@ static struct lock_class_key irq_desc_lock_class; #if defined(CONFIG_SMP) static int __init irq_affinity_setup(char *str) { - zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); + alloc_bootmem_cpumask_var(&irq_default_affinity); cpulist_parse(str, irq_default_affinity); /* * Set at least the boot cpu. We don't want to end up with @@ -40,10 +40,8 @@ __setup("irqaffinity=", irq_affinity_setup); static void __init init_irq_default_affinity(void) { -#ifdef CONFIG_CPUMASK_OFFSTACK - if (!irq_default_affinity) + if (!cpumask_available(irq_default_affinity)) zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); -#endif if (cpumask_empty(irq_default_affinity)) cpumask_setall(irq_default_affinity); } -- GitLab From b16a6af97461f6d296422887502331b33c729b48 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 5 Apr 2018 16:23:05 -0700 Subject: [PATCH 0097/1291] mm: hwpoison: disable memory error handling on 1GB hugepage commit 31286a8484a85e8b4e91ddb0f5415aee8a416827 upstream. Recently the following BUG was reported: Injecting memory failure for pfn 0x3c0000 at process virtual address 0x7fe300000000 Memory failure: 0x3c0000: recovery action for huge page: Recovered BUG: unable to handle kernel paging request at ffff8dfcc0003000 IP: gup_pgd_range+0x1f0/0xc20 PGD 17ae72067 P4D 17ae72067 PUD 0 Oops: 0000 [#1] SMP PTI ... CPU: 3 PID: 5467 Comm: hugetlb_1gb Not tainted 4.15.0-rc8-mm1-abc+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.fc25 04/01/2014 You can easily reproduce this by calling madvise(MADV_HWPOISON) twice on a 1GB hugepage. This happens because get_user_pages_fast() is not aware of a migration entry on pud that was created in the 1st madvise() event. I think that conversion to pud-aligned migration entry is working, but other MM code walking over page table isn't prepared for it. We need some time and effort to make all this work properly, so this patch avoids the reported bug by just disabling error handling for 1GB hugepage. [n-horiguchi@ah.jp.nec.com: v2] Link: http://lkml.kernel.org/r/1517284444-18149-1-git-send-email-n-horiguchi@ah.jp.nec.com Link: http://lkml.kernel.org/r/1517207283-15769-1-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Acked-by: Michal Hocko Reviewed-by: Andrew Morton Reviewed-by: Mike Kravetz Acked-by: Punit Agrawal Tested-by: Michael Ellerman Cc: Anshuman Khandual Cc: "Aneesh Kumar K.V" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h | 1 + mm/memory-failure.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index f23215854c80..a26cf767407e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2549,6 +2549,7 @@ enum mf_action_page_type { MF_MSG_POISONED_HUGE, MF_MSG_HUGE, MF_MSG_FREE_HUGE, + MF_MSG_NON_PMD_HUGE, MF_MSG_UNMAP_FAILED, MF_MSG_DIRTY_SWAPCACHE, MF_MSG_CLEAN_SWAPCACHE, diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 1cd3b3569af8..345e69d88b37 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -508,6 +508,7 @@ static const char * const action_page_types[] = { [MF_MSG_POISONED_HUGE] = "huge page already hardware poisoned", [MF_MSG_HUGE] = "huge page", [MF_MSG_FREE_HUGE] = "free huge page", + [MF_MSG_NON_PMD_HUGE] = "non-pmd-sized huge page", [MF_MSG_UNMAP_FAILED] = "unmapping failed page", [MF_MSG_DIRTY_SWAPCACHE] = "dirty swapcache page", [MF_MSG_CLEAN_SWAPCACHE] = "clean swapcache page", @@ -1090,6 +1091,21 @@ static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags) return 0; } + /* + * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so + * simply disable it. In order to make it work properly, we need + * make sure that: + * - conversion of a pud that maps an error hugetlb into hwpoison + * entry properly works, and + * - other mm code walking over page table is aware of pud-aligned + * hwpoison entries. + */ + if (huge_page_size(page_hstate(head)) > PMD_SIZE) { + action_result(pfn, MF_MSG_NON_PMD_HUGE, MF_IGNORED); + res = -EBUSY; + goto out; + } + if (!hwpoison_user_mappings(p, pfn, trapno, flags, &head)) { action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); res = -EBUSY; -- GitLab From a2c7493c7f31c0e4d1dede9a179f853484f7dd1d Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 2 Feb 2018 05:08:59 -0500 Subject: [PATCH 0098/1291] media: vb2: core: Finish buffers at the end of the stream commit 03703ed1debf777ea845aa9b50ba2e80a5e7dd3c upstream. If buffers were prepared or queued and the buffers were released without starting the queue, the finish mem op (corresponding to the prepare mem op) was never called to the buffers. Before commit a136f59c0a1f there was no need to do this as in such a case the prepare mem op had not been called yet. Address the problem by explicitly calling finish mem op when the queue is stopped if the buffer is in either prepared or queued state. Fixes: a136f59c0a1f ("[media] vb2: Move buffer cache synchronisation to prepare from queue") Cc: stable@vger.kernel.org # for v4.13 and up Signed-off-by: Sakari Ailus Tested-by: Devin Heitmueller Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/videobuf2-core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index 6d9adcaa26ba..ffbb178c6918 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -1689,6 +1689,15 @@ static void __vb2_queue_cancel(struct vb2_queue *q) for (i = 0; i < q->num_buffers; ++i) { struct vb2_buffer *vb = q->bufs[i]; + if (vb->state == VB2_BUF_STATE_PREPARED || + vb->state == VB2_BUF_STATE_QUEUED) { + unsigned int plane; + + for (plane = 0; plane < vb->num_planes; ++plane) + call_void_memop(vb, finish, + vb->planes[plane].mem_priv); + } + if (vb->state != VB2_BUF_STATE_DEQUEUED) { vb->state = VB2_BUF_STATE_PREPARED; call_void_vb_qop(vb, buf_finish, vb); -- GitLab From 42dc2a7bb72ed376ccfe8b8f49187a00f893c53f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 30 Mar 2018 17:58:13 -0700 Subject: [PATCH 0099/1291] f2fs: truncate preallocated blocks in error case commit dc7a10ddee0c56c6d891dd18de5c4ee9869545e0 upstream. If write is failed, we must deallocate the blocks that we couldn't write. Cc: stable@vger.kernel.org Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 29c5f799890c..72c6a9e9a9b4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2694,11 +2694,16 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret > 0) { + bool preallocated = false; + size_t target_size = 0; int err; if (iov_iter_fault_in_readable(from, iov_iter_count(from))) set_inode_flag(inode, FI_NO_PREALLOC); + preallocated = true; + target_size = iocb->ki_pos + iov_iter_count(from); + err = f2fs_preallocate_blocks(iocb, from); if (err) { clear_inode_flag(inode, FI_NO_PREALLOC); @@ -2710,6 +2715,10 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) blk_finish_plug(&plug); clear_inode_flag(inode, FI_NO_PREALLOC); + /* if we couldn't write data, we should deallocate blocks. */ + if (preallocated && i_size_read(inode) < target_size) + f2fs_truncate(inode); + if (ret > 0) f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } -- GitLab From 3bb6397ba643d20a6bc0f9a729a6e6e7c83e420a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 10 Jul 2018 16:01:30 +0200 Subject: [PATCH 0100/1291] Revert "dpaa_eth: fix error in dpaa_remove()" This reverts commit 5bbb99d2fde047df596379be6c58e265e2ddbe1f which is commit 88075256ee817041d68c2387f29065b5cb2b342a upstream. Jiri writes that this was an incorrect fix, and Madalin-cristian says it was fixed differently in a later patch. So just revert this from 4.14.y. Reported-by: Jiri Slaby Cc: Madalin Bucur Cc: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 5b4f05805006..519a021c0a25 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2863,7 +2863,7 @@ static int dpaa_remove(struct platform_device *pdev) struct device *dev; int err; - dev = pdev->dev.parent; + dev = &pdev->dev; net_dev = dev_get_drvdata(dev); priv = netdev_priv(net_dev); -- GitLab From e82885490a611f2b75a6c27cd7bb09665c1740be Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Sun, 8 Apr 2018 23:35:28 +0200 Subject: [PATCH 0101/1291] Kbuild: fix # escaping in .cmd files for future Make MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9564a8cf422d7b58f6e857e3546d346fa970191e upstream. I tried building using a freshly built Make (4.2.1-69-g8a731d1), but already the objtool build broke with orc_dump.c: In function ‘orc_dump’: orc_dump.c:106:2: error: ‘elf_getshnum’ is deprecated [-Werror=deprecated-declarations] if (elf_getshdrnum(elf, &nr_sections)) { Turns out that with that new Make, the backslash was not removed, so cpp didn't see a #include directive, grep found nothing, and -DLIBELF_USE_DEPRECATED was wrongly put in CFLAGS. Now, that new Make behaviour is documented in their NEWS file: * WARNING: Backward-incompatibility! Number signs (#) appearing inside a macro reference or function invocation no longer introduce comments and should not be escaped with backslashes: thus a call such as: foo := $(shell echo '#') is legal. Previously the number sign needed to be escaped, for example: foo := $(shell echo '\#') Now this latter will resolve to "\#". If you want to write makefiles portable to both versions, assign the number sign to a variable: C := \# foo := $(shell echo '$C') This was claimed to be fixed in 3.81, but wasn't, for some reason. To detect this change search for 'nocomment' in the .FEATURES variable. This also fixes up the two make-cmd instances to replace # with $(pound) rather than with \#. There might very well be other places that need similar fixup in preparation for whatever future Make release contains the above change, but at least this builds an x86_64 defconfig with the new make. Link: https://bugzilla.kernel.org/show_bug.cgi?id=197847 Cc: Randy Dunlap Signed-off-by: Rasmus Villemoes Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- scripts/Kbuild.include | 5 +++-- tools/build/Build.include | 5 +++-- tools/objtool/Makefile | 2 +- tools/scripts/Makefile.include | 2 ++ 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 97769465de13..fcbbecf92395 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -8,6 +8,7 @@ squote := ' empty := space := $(empty) $(empty) space_escape := _-_SPACE_-_ +pound := \# ### # Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o @@ -251,11 +252,11 @@ endif # Replace >$< with >$$< to preserve $ when reloading the .cmd file # (needed for make) -# Replace >#< with >\#< to avoid starting a comment in the .cmd file +# Replace >#< with >$(pound)< to avoid starting a comment in the .cmd file # (needed for make) # Replace >'< with >'\''< to be able to enclose the whole string in '...' # (needed for the shell) -make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1))))) +make-cmd = $(call escsq,$(subst $(pound),$$(pound),$(subst $$,$$$$,$(cmd_$(1))))) # Find any prerequisites that is newer than target or that does not exist. # PHONY targets skipped in both cases. diff --git a/tools/build/Build.include b/tools/build/Build.include index 418871d02ebf..a4bbb984941d 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -12,6 +12,7 @@ # Convenient variables comma := , squote := ' +pound := \# ### # Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o @@ -43,11 +44,11 @@ echo-cmd = $(if $($(quiet)cmd_$(1)),\ ### # Replace >$< with >$$< to preserve $ when reloading the .cmd file # (needed for make) -# Replace >#< with >\#< to avoid starting a comment in the .cmd file +# Replace >#< with >$(pound)< to avoid starting a comment in the .cmd file # (needed for make) # Replace >'< with >'\''< to be able to enclose the whole string in '...' # (needed for the shell) -make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1))))) +make-cmd = $(call escsq,$(subst $(pound),$$(pound),$(subst $$,$$$$,$(cmd_$(1))))) ### # Find any prerequisites that is newer than target or that does not exist. diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index e6acc281dd37..8ae824dbfca3 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -35,7 +35,7 @@ CFLAGS += -Wall -Werror $(WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES) LDFLAGS += -lelf $(LIBSUBCMD) # Allow old libelf to be used: -elfshdr := $(shell echo '\#include ' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr) +elfshdr := $(shell echo '$(pound)include ' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr) CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED) AWK = awk diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 654efd9768fd..5f3f1f44ed0a 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -101,3 +101,5 @@ ifneq ($(silent),1) QUIET_INSTALL = @printf ' INSTALL %s\n' $1; endif endif + +pound := \# -- GitLab From c4097c64d03afaf1212a76729b6758f662e901b1 Mon Sep 17 00:00:00 2001 From: Brad Love Date: Tue, 6 Mar 2018 14:15:34 -0500 Subject: [PATCH 0102/1291] media: cx25840: Use subdev host data for PLL override commit 3ee9bc12342cf546313d300808ff47d7dbb8e7db upstream. The cx25840 driver currently configures 885, 887, and 888 using default divisors for each chip. This check to see if the cx23885 driver has passed the cx25840 a non-default clock rate for a specific chip. If a cx23885 board has left clk_freq at 0, the clock default values will be used to configure the PLLs. This patch only has effect on 888 boards who set clk_freq to 25M. Signed-off-by: Brad Love Signed-off-by: Mauro Carvalho Chehab Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/cx25840/cx25840-core.c | 28 +++++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/media/i2c/cx25840/cx25840-core.c b/drivers/media/i2c/cx25840/cx25840-core.c index 39f51daa7558..c5642813eff1 100644 --- a/drivers/media/i2c/cx25840/cx25840-core.c +++ b/drivers/media/i2c/cx25840/cx25840-core.c @@ -463,8 +463,13 @@ static void cx23885_initialize(struct i2c_client *client) { DEFINE_WAIT(wait); struct cx25840_state *state = to_state(i2c_get_clientdata(client)); + u32 clk_freq = 0; struct workqueue_struct *q; + /* cx23885 sets hostdata to clk_freq pointer */ + if (v4l2_get_subdev_hostdata(&state->sd)) + clk_freq = *((u32 *)v4l2_get_subdev_hostdata(&state->sd)); + /* * Come out of digital power down * The CX23888, at least, needs this, otherwise registers aside from @@ -500,8 +505,13 @@ static void cx23885_initialize(struct i2c_client *client) * 50.0 MHz * (0xb + 0xe8ba26/0x2000000)/4 = 5 * 28.636363 MHz * 572.73 MHz before post divide */ - /* HVR1850 or 50MHz xtal */ - cx25840_write(client, 0x2, 0x71); + if (clk_freq == 25000000) { + /* 888/ImpactVCBe or 25Mhz xtal */ + ; /* nothing to do */ + } else { + /* HVR1850 or 50MHz xtal */ + cx25840_write(client, 0x2, 0x71); + } cx25840_write4(client, 0x11c, 0x01d1744c); cx25840_write4(client, 0x118, 0x00000416); cx25840_write4(client, 0x404, 0x0010253e); @@ -544,9 +554,15 @@ static void cx23885_initialize(struct i2c_client *client) /* HVR1850 */ switch (state->id) { case CX23888_AV: - /* 888/HVR1250 specific */ - cx25840_write4(client, 0x10c, 0x13333333); - cx25840_write4(client, 0x108, 0x00000515); + if (clk_freq == 25000000) { + /* 888/ImpactVCBe or 25MHz xtal */ + cx25840_write4(client, 0x10c, 0x01b6db7b); + cx25840_write4(client, 0x108, 0x00000512); + } else { + /* 888/HVR1250 or 50MHz xtal */ + cx25840_write4(client, 0x10c, 0x13333333); + cx25840_write4(client, 0x108, 0x00000515); + } break; default: cx25840_write4(client, 0x10c, 0x002be2c9); @@ -576,7 +592,7 @@ static void cx23885_initialize(struct i2c_client *client) * 368.64 MHz before post divide * 122.88 MHz / 0xa = 12.288 MHz */ - /* HVR1850 or 50MHz xtal */ + /* HVR1850 or 50MHz xtal or 25MHz xtal */ cx25840_write4(client, 0x114, 0x017dbf48); cx25840_write4(client, 0x110, 0x000a030e); break; -- GitLab From 5941026fc7a540b9d60824a9fb77ccdbe2238c0c Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 18 Jun 2018 22:41:03 +0200 Subject: [PATCH 0103/1291] mtd: rawnand: mxc: set spare area size register explicitly commit 3f77f244d8ec28e3a0a81240ffac7d626390060c upstream. The v21 version of the NAND flash controller contains a Spare Area Size Register (SPAS) at offset 0x10. Its setting defaults to the maximum spare area size of 218 bytes. The size that is set in this register is used by the controller when it calculates the ECC bytes internally in hardware. Usually, this register is updated from settings in the IIM fuses when the system is booting from NAND flash. For other boot media, however, the SPAS register remains at the default setting, which may not work for the particular flash chip on the board. The same goes for flash chips whose configuration cannot be set in the IIM fuses (e.g. chips with 2k sector size and 128 bytes spare area size can't be configured in the IIM fuses on imx25 systems). Set the SPAS register explicitly during the preset operation. Derive the register value from mtd->oobsize that was detected during probe by decoding the flash chip's ID bytes. While at it, rename the define for the spare area register's offset to NFC_V21_RSLTSPARE_AREA. The register at offset 0x10 on v1 controllers is different from the register on v21 controllers. Fixes: d484018 ("mtd: mxc_nand: set NFC registers after reset") Cc: stable@vger.kernel.org Signed-off-by: Martin Kaiser Reviewed-by: Sascha Hauer Reviewed-by: Miquel Raynal Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/mxc_nand.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index 53e5e0337c3e..fcb575d55b89 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -48,7 +48,7 @@ #define NFC_V1_V2_CONFIG (host->regs + 0x0a) #define NFC_V1_V2_ECC_STATUS_RESULT (host->regs + 0x0c) #define NFC_V1_V2_RSLTMAIN_AREA (host->regs + 0x0e) -#define NFC_V1_V2_RSLTSPARE_AREA (host->regs + 0x10) +#define NFC_V21_RSLTSPARE_AREA (host->regs + 0x10) #define NFC_V1_V2_WRPROT (host->regs + 0x12) #define NFC_V1_UNLOCKSTART_BLKADDR (host->regs + 0x14) #define NFC_V1_UNLOCKEND_BLKADDR (host->regs + 0x16) @@ -1119,6 +1119,9 @@ static void preset_v2(struct mtd_info *mtd) writew(config1, NFC_V1_V2_CONFIG1); /* preset operation */ + /* spare area size in 16-bit half-words */ + writew(mtd->oobsize / 2, NFC_V21_RSLTSPARE_AREA); + /* Unlock the internal RAM Buffer */ writew(0x2, NFC_V1_V2_CONFIG); -- GitLab From a19385766b4faa1e27aff8b677e2be6a0f03afe4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 30 May 2018 13:03:45 -0700 Subject: [PATCH 0104/1291] fs: allow per-device dax status checking for filesystems commit ba23cba9b3bdc967aabdc6ff1e3e9b11ce05bb4f upstream. Change bdev_dax_supported so it takes a bdev parameter. This enables multi-device filesystems like xfs to check that a dax device can work for the particular filesystem. Once that's in place, actually fix all the parts of XFS where we need to be able to distinguish between datadev and rtdev. This patch fixes the problem where we screw up the dax support checking in xfs if the datadev and rtdev have different dax capabilities. Signed-off-by: Darrick J. Wong [rez: Re-added __bdev_dax_supported() for !CONFIG_FS_DAX cases] Signed-off-by: Ross Zwisler Reviewed-by: Eric Sandeen Signed-off-by: Greg Kroah-Hartman --- drivers/dax/super.c | 22 +++++++++++----------- fs/ext2/super.c | 2 +- fs/ext4/super.c | 2 +- fs/xfs/xfs_ioctl.c | 3 ++- fs/xfs/xfs_iops.c | 30 +++++++++++++++++++++++++----- fs/xfs/xfs_super.c | 10 ++++++++-- include/linux/dax.h | 9 +++++---- 7 files changed, 53 insertions(+), 25 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index c4cd034a3820..8d54021cd2f6 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -73,7 +73,7 @@ EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); /** * __bdev_dax_supported() - Check if the device supports dax for filesystem - * @sb: The superblock of the device + * @bdev: block device to check * @blocksize: The block size of the device * * This is a library function for filesystems to check if the block device @@ -81,33 +81,33 @@ EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); * * Return: negative errno if unsupported, 0 if supported. */ -int __bdev_dax_supported(struct super_block *sb, int blocksize) +int __bdev_dax_supported(struct block_device *bdev, int blocksize) { - struct block_device *bdev = sb->s_bdev; struct dax_device *dax_dev; pgoff_t pgoff; int err, id; void *kaddr; pfn_t pfn; long len; + char buf[BDEVNAME_SIZE]; if (blocksize != PAGE_SIZE) { - pr_err("VFS (%s): error: unsupported blocksize for dax\n", - sb->s_id); + pr_debug("%s: error: unsupported blocksize for dax\n", + bdevname(bdev, buf)); return -EINVAL; } err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff); if (err) { - pr_err("VFS (%s): error: unaligned partition for dax\n", - sb->s_id); + pr_debug("%s: error: unaligned partition for dax\n", + bdevname(bdev, buf)); return err; } dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); if (!dax_dev) { - pr_err("VFS (%s): error: device does not support dax\n", - sb->s_id); + pr_debug("%s: error: device does not support dax\n", + bdevname(bdev, buf)); return -EOPNOTSUPP; } @@ -118,8 +118,8 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) put_dax(dax_dev); if (len < 1) { - pr_err("VFS (%s): error: dax access failed (%ld)", - sb->s_id, len); + pr_debug("%s: error: dax access failed (%ld)\n", + bdevname(bdev, buf), len); return len < 0 ? len : -EIO; } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 1458706bd2ec..e85c57eb2734 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -953,7 +953,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); if (sbi->s_mount_opt & EXT2_MOUNT_DAX) { - err = bdev_dax_supported(sb, blocksize); + err = bdev_dax_supported(sb->s_bdev, blocksize); if (err) goto failed_mount; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 83ba37be4702..3289ec862192 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3770,7 +3770,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) " that may contain inline data"); goto failed_mount; } - err = bdev_dax_supported(sb, blocksize); + err = bdev_dax_supported(sb->s_bdev, blocksize); if (err) goto failed_mount; } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index aa75389be8cf..682b6946d06a 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1101,7 +1101,8 @@ xfs_ioctl_setattr_dax_invalidate( if (fa->fsx_xflags & FS_XFLAG_DAX) { if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) return -EINVAL; - if (bdev_dax_supported(sb, sb->s_blocksize) < 0) + if (bdev_dax_supported(xfs_find_bdev_for_inode(VFS_I(ip)), + sb->s_blocksize) < 0) return -EINVAL; } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index f24e5b6cfc86..1daa965f1e08 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1184,6 +1184,30 @@ static const struct inode_operations xfs_inline_symlink_inode_operations = { .update_time = xfs_vn_update_time, }; +/* Figure out if this file actually supports DAX. */ +static bool +xfs_inode_supports_dax( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + + /* Only supported on non-reflinked files. */ + if (!S_ISREG(VFS_I(ip)->i_mode) || xfs_is_reflink_inode(ip)) + return false; + + /* DAX mount option or DAX iflag must be set. */ + if (!(mp->m_flags & XFS_MOUNT_DAX) && + !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) + return false; + + /* Block size must match page size */ + if (mp->m_sb.sb_blocksize != PAGE_SIZE) + return false; + + /* Device has to support DAX too. */ + return xfs_find_daxdev_for_inode(VFS_I(ip)) != NULL; +} + STATIC void xfs_diflags_to_iflags( struct inode *inode, @@ -1202,11 +1226,7 @@ xfs_diflags_to_iflags( inode->i_flags |= S_SYNC; if (flags & XFS_DIFLAG_NOATIME) inode->i_flags |= S_NOATIME; - if (S_ISREG(inode->i_mode) && - ip->i_mount->m_sb.sb_blocksize == PAGE_SIZE && - !xfs_is_reflink_inode(ip) && - (ip->i_mount->m_flags & XFS_MOUNT_DAX || - ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) + if (xfs_inode_supports_dax(ip)) inode->i_flags |= S_DAX; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f663022353c0..89f3e54e57f5 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1640,11 +1640,17 @@ xfs_fs_fill_super( sb->s_flags |= SB_I_VERSION; if (mp->m_flags & XFS_MOUNT_DAX) { + int error2 = 0; + xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); - error = bdev_dax_supported(sb, sb->s_blocksize); - if (error) { + error = bdev_dax_supported(mp->m_ddev_targp->bt_bdev, + sb->s_blocksize); + if (mp->m_rtdev_targp) + error2 = bdev_dax_supported(mp->m_rtdev_targp->bt_bdev, + sb->s_blocksize); + if (error && error2) { xfs_alert(mp, "DAX unsupported by block device. Turning off DAX."); mp->m_flags &= ~XFS_MOUNT_DAX; diff --git a/include/linux/dax.h b/include/linux/dax.h index 895e16fcc62d..7b37727ad3ed 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -40,10 +40,10 @@ static inline void put_dax(struct dax_device *dax_dev) int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) -int __bdev_dax_supported(struct super_block *sb, int blocksize); -static inline int bdev_dax_supported(struct super_block *sb, int blocksize) +int __bdev_dax_supported(struct block_device *bdev, int blocksize); +static inline int bdev_dax_supported(struct block_device *bdev, int blocksize) { - return __bdev_dax_supported(sb, blocksize); + return __bdev_dax_supported(bdev, blocksize); } static inline struct dax_device *fs_dax_get_by_host(const char *host) @@ -58,7 +58,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev) struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); #else -static inline int bdev_dax_supported(struct super_block *sb, int blocksize) +static inline int bdev_dax_supported(struct block_device *bdev, + int blocksize) { return -EOPNOTSUPP; } -- GitLab From 8214347c260b0839c007ec4882e180fca1b6a077 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 30 May 2018 13:03:46 -0700 Subject: [PATCH 0105/1291] dax: change bdev_dax_supported() to support boolean returns commit 80660f20252d6f76c9f203874ad7c7a4a8508cf8 upstream. The function return values are confusing with the way the function is named. We expect a true or false return value but it actually returns 0/-errno. This makes the code very confusing. Changing the return values to return a bool where if DAX is supported then return true and no DAX support returns false. Signed-off-by: Dave Jiang Signed-off-by: Ross Zwisler Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- drivers/dax/super.c | 14 +++++++------- fs/ext2/super.c | 3 +-- fs/ext4/super.c | 3 +-- fs/xfs/xfs_ioctl.c | 4 ++-- fs/xfs/xfs_super.c | 12 ++++++------ include/linux/dax.h | 8 ++++---- 6 files changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 8d54021cd2f6..b9c3e8cd0820 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -79,9 +79,9 @@ EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); * This is a library function for filesystems to check if the block device * can be mounted with dax option. * - * Return: negative errno if unsupported, 0 if supported. + * Return: true if supported, false if unsupported */ -int __bdev_dax_supported(struct block_device *bdev, int blocksize) +bool __bdev_dax_supported(struct block_device *bdev, int blocksize) { struct dax_device *dax_dev; pgoff_t pgoff; @@ -94,21 +94,21 @@ int __bdev_dax_supported(struct block_device *bdev, int blocksize) if (blocksize != PAGE_SIZE) { pr_debug("%s: error: unsupported blocksize for dax\n", bdevname(bdev, buf)); - return -EINVAL; + return false; } err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff); if (err) { pr_debug("%s: error: unaligned partition for dax\n", bdevname(bdev, buf)); - return err; + return false; } dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); if (!dax_dev) { pr_debug("%s: error: device does not support dax\n", bdevname(bdev, buf)); - return -EOPNOTSUPP; + return false; } id = dax_read_lock(); @@ -120,10 +120,10 @@ int __bdev_dax_supported(struct block_device *bdev, int blocksize) if (len < 1) { pr_debug("%s: error: dax access failed (%ld)\n", bdevname(bdev, buf), len); - return len < 0 ? len : -EIO; + return false; } - return 0; + return true; } EXPORT_SYMBOL_GPL(__bdev_dax_supported); #endif diff --git a/fs/ext2/super.c b/fs/ext2/super.c index e85c57eb2734..726e680a3368 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -953,8 +953,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); if (sbi->s_mount_opt & EXT2_MOUNT_DAX) { - err = bdev_dax_supported(sb->s_bdev, blocksize); - if (err) + if (!bdev_dax_supported(sb->s_bdev, blocksize)) goto failed_mount; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3289ec862192..fc32a67a7a19 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3770,8 +3770,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) " that may contain inline data"); goto failed_mount; } - err = bdev_dax_supported(sb->s_bdev, blocksize); - if (err) + if (!bdev_dax_supported(sb->s_bdev, blocksize)) goto failed_mount; } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 682b6946d06a..79a9a0def7db 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1101,8 +1101,8 @@ xfs_ioctl_setattr_dax_invalidate( if (fa->fsx_xflags & FS_XFLAG_DAX) { if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) return -EINVAL; - if (bdev_dax_supported(xfs_find_bdev_for_inode(VFS_I(ip)), - sb->s_blocksize) < 0) + if (!bdev_dax_supported(xfs_find_bdev_for_inode(VFS_I(ip)), + sb->s_blocksize)) return -EINVAL; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 89f3e54e57f5..0b0282d2f011 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1640,17 +1640,17 @@ xfs_fs_fill_super( sb->s_flags |= SB_I_VERSION; if (mp->m_flags & XFS_MOUNT_DAX) { - int error2 = 0; + bool rtdev_is_dax = false, datadev_is_dax; xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); - error = bdev_dax_supported(mp->m_ddev_targp->bt_bdev, - sb->s_blocksize); + datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev, + sb->s_blocksize); if (mp->m_rtdev_targp) - error2 = bdev_dax_supported(mp->m_rtdev_targp->bt_bdev, - sb->s_blocksize); - if (error && error2) { + rtdev_is_dax = bdev_dax_supported( + mp->m_rtdev_targp->bt_bdev, sb->s_blocksize); + if (!rtdev_is_dax && !datadev_is_dax) { xfs_alert(mp, "DAX unsupported by block device. Turning off DAX."); mp->m_flags &= ~XFS_MOUNT_DAX; diff --git a/include/linux/dax.h b/include/linux/dax.h index 7b37727ad3ed..07d6bc1f90a3 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -40,8 +40,8 @@ static inline void put_dax(struct dax_device *dax_dev) int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) -int __bdev_dax_supported(struct block_device *bdev, int blocksize); -static inline int bdev_dax_supported(struct block_device *bdev, int blocksize) +bool __bdev_dax_supported(struct block_device *bdev, int blocksize); +static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize) { return __bdev_dax_supported(bdev, blocksize); } @@ -58,10 +58,10 @@ static inline void fs_put_dax(struct dax_device *dax_dev) struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); #else -static inline int bdev_dax_supported(struct block_device *bdev, +static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize) { - return -EOPNOTSUPP; + return false; } static inline struct dax_device *fs_dax_get_by_host(const char *host) -- GitLab From 3729e5561e1e4a0319a2c794d8ae78b8baea4a36 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 26 Jun 2018 16:30:40 -0600 Subject: [PATCH 0106/1291] dax: check for QUEUE_FLAG_DAX in bdev_dax_supported() commit 15256f6cc4b44f2e70503758150267fd2a53c0d6 upstream. Add an explicit check for QUEUE_FLAG_DAX to __bdev_dax_supported(). This is needed for DM configurations where the first element in the dm-linear or dm-stripe target supports DAX, but other elements do not. Without this check __bdev_dax_supported() will pass for such devices, letting a filesystem on that device mount with the DAX option. Signed-off-by: Ross Zwisler Suggested-by: Mike Snitzer Fixes: commit 545ed20e6df6 ("dm: add infrastructure for DAX support") Cc: stable@vger.kernel.org Acked-by: Dan Williams Reviewed-by: Toshi Kani Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/dax/super.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index b9c3e8cd0820..6c179c2a9ff9 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -84,6 +84,7 @@ EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); bool __bdev_dax_supported(struct block_device *bdev, int blocksize) { struct dax_device *dax_dev; + struct request_queue *q; pgoff_t pgoff; int err, id; void *kaddr; @@ -97,6 +98,13 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) return false; } + q = bdev_get_queue(bdev); + if (!q || !blk_queue_dax(q)) { + pr_debug("%s: error: request queue doesn't support dax\n", + bdevname(bdev, buf)); + return false; + } + err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff); if (err) { pr_debug("%s: error: unaligned partition for dax\n", -- GitLab From 0605fa6daa6642c276ef037b3246595e4de53491 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 4 Dec 2017 23:28:32 -0500 Subject: [PATCH 0107/1291] dm: set QUEUE_FLAG_DAX accordingly in dm_table_set_restrictions() commit ad3793fc3945173f64d82d05d3ecde41f6c0435c upstream. Rather than having DAX support be unique by setting it based on table type in dm_setup_md_queue(). Signed-off-by: Mike Snitzer Signed-off-by: Ross Zwisler Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-table.c | 2 ++ drivers/md/dm.c | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 4287fc9f3527..94849435c487 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1813,6 +1813,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } blk_queue_write_cache(q, wc, fua); + if (dm_table_supports_dax(t)) + queue_flag_set_unlocked(QUEUE_FLAG_DAX, q); if (dm_table_supports_dax_write_cache(t)) dax_write_cache(t->md->dax_dev, true); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1dfc855ac708..b721f0a16711 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2050,9 +2050,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) */ bioset_free(md->queue->bio_split); md->queue->bio_split = NULL; - - if (type == DM_TYPE_DAX_BIO_BASED) - queue_flag_set_unlocked(QUEUE_FLAG_DAX, md->queue); break; case DM_TYPE_NONE: WARN_ON_ONCE(true); -- GitLab From 74ec37d03a12b001120c81fecfcb878e5e9d4382 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 26 Jun 2018 16:30:41 -0600 Subject: [PATCH 0108/1291] dm: prevent DAX mounts if not supported commit dbc626597c39b24cefce09fbd8e9dea85869a801 upstream. Currently device_supports_dax() just checks to see if the QUEUE_FLAG_DAX flag is set on the device's request queue to decide whether or not the device supports filesystem DAX. Really we should be using bdev_dax_supported() like filesystems do at mount time. This performs other tests like checking to make sure the dax_direct_access() path works. We also explicitly clear QUEUE_FLAG_DAX on the DM device's request queue if any of the underlying devices do not support DAX. This makes the handling of QUEUE_FLAG_DAX consistent with the setting/clearing of most other flags in dm_table_set_restrictions(). Now that bdev_dax_supported() explicitly checks for QUEUE_FLAG_DAX, this will ensure that filesystems built upon DM devices will only be able to mount with DAX if all underlying devices also support DAX. Signed-off-by: Ross Zwisler Fixes: commit 545ed20e6df6 ("dm: add infrastructure for DAX support") Cc: stable@vger.kernel.org Acked-by: Dan Williams Reviewed-by: Toshi Kani Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-table.c | 7 ++++--- drivers/md/dm.c | 3 +-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 94849435c487..f9cd81375f28 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -883,9 +883,7 @@ EXPORT_SYMBOL_GPL(dm_table_set_type); static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct request_queue *q = bdev_get_queue(dev->bdev); - - return q && blk_queue_dax(q); + return bdev_dax_supported(dev->bdev, PAGE_SIZE); } static bool dm_table_supports_dax(struct dm_table *t) @@ -1815,6 +1813,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (dm_table_supports_dax(t)) queue_flag_set_unlocked(QUEUE_FLAG_DAX, q); + else + queue_flag_clear_unlocked(QUEUE_FLAG_DAX, q); + if (dm_table_supports_dax_write_cache(t)) dax_write_cache(t->md->dax_dev, true); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b721f0a16711..24ec6e039448 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -961,8 +961,7 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, if (len < 1) goto out; nr_pages = min(len, nr_pages); - if (ti->type->direct_access) - ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn); + ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn); out: dm_put_live_table(md, srcu_idx); -- GitLab From eb638a002274b5a73b2be94e1553ac96e0b4f3fd Mon Sep 17 00:00:00 2001 From: Tokunori Ikegami Date: Wed, 30 May 2018 18:32:27 +0900 Subject: [PATCH 0109/1291] mtd: cfi_cmdset_0002: Change definition naming to retry write operation commit 85a82e28b023de9b259a86824afbd6ba07bd6475 upstream. The definition can be used for other program and erase operations also. So change the naming to MAX_RETRIES from MAX_WORD_RETRIES. Signed-off-by: Tokunori Ikegami Reviewed-by: Joakim Tjernlund Cc: Chris Packham Cc: Brian Norris Cc: David Woodhouse Cc: Boris Brezillon Cc: Marek Vasut Cc: Richard Weinberger Cc: Cyrille Pitchen Cc: linux-mtd@lists.infradead.org Cc: stable@vger.kernel.org Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/chips/cfi_cmdset_0002.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index ac76c10c042f..36875bc7af78 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -42,7 +42,7 @@ #define AMD_BOOTLOC_BUG #define FORCE_WORD_WRITE 0 -#define MAX_WORD_RETRIES 3 +#define MAX_RETRIES 3 #define SST49LF004B 0x0060 #define SST49LF040B 0x0050 @@ -1647,7 +1647,7 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip, map_write( map, CMD(0xF0), chip->start ); /* FIXME - should have reset delay before continuing */ - if (++retry_cnt <= MAX_WORD_RETRIES) + if (++retry_cnt <= MAX_RETRIES) goto retry; ret = -EIO; @@ -2106,7 +2106,7 @@ static int do_panic_write_oneword(struct map_info *map, struct flchip *chip, map_write(map, CMD(0xF0), chip->start); /* FIXME - should have reset delay before continuing */ - if (++retry_cnt <= MAX_WORD_RETRIES) + if (++retry_cnt <= MAX_RETRIES) goto retry; ret = -EIO; -- GitLab From b76d8aa04240a07b1bc55759a3aa75556c106eed Mon Sep 17 00:00:00 2001 From: Tokunori Ikegami Date: Wed, 30 May 2018 18:32:28 +0900 Subject: [PATCH 0110/1291] mtd: cfi_cmdset_0002: Change erase functions to retry for error commit 45f75b8a919a4255f52df454f1ffdee0e42443b2 upstream. For the word write functions it is retried for error. But it is not implemented to retry for the erase functions. To make sure for the erase functions change to retry as same. This is needed to prevent the flash erase error caused only once. It was caused by the error case of chip_good() in the do_erase_oneblock(). Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G. But the error issue behavior is not able to reproduce at this moment. The flash controller is parallel Flash interface integrated on BCM53003. Signed-off-by: Tokunori Ikegami Reviewed-by: Joakim Tjernlund Cc: Chris Packham Cc: Brian Norris Cc: David Woodhouse Cc: Boris Brezillon Cc: Marek Vasut Cc: Richard Weinberger Cc: Cyrille Pitchen Cc: linux-mtd@lists.infradead.org Cc: stable@vger.kernel.org Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/chips/cfi_cmdset_0002.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 36875bc7af78..729c1b8a57b2 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -2241,6 +2241,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip) unsigned long int adr; DECLARE_WAITQUEUE(wait, current); int ret = 0; + int retry_cnt = 0; adr = cfi->addr_unlock1; @@ -2258,6 +2259,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip) ENABLE_VPP(map); xip_disable(map, chip, adr); + retry: cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); @@ -2312,6 +2314,9 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip) map_write( map, CMD(0xF0), chip->start ); /* FIXME - should have reset delay before continuing */ + if (++retry_cnt <= MAX_RETRIES) + goto retry; + ret = -EIO; } @@ -2331,6 +2336,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, unsigned long timeo = jiffies + HZ; DECLARE_WAITQUEUE(wait, current); int ret = 0; + int retry_cnt = 0; adr += chip->start; @@ -2348,6 +2354,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, ENABLE_VPP(map); xip_disable(map, chip, adr); + retry: cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); @@ -2405,6 +2412,9 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, map_write( map, CMD(0xF0), chip->start ); /* FIXME - should have reset delay before continuing */ + if (++retry_cnt <= MAX_RETRIES) + goto retry; + ret = -EIO; } -- GitLab From ac6bfe418e043e20f3805e95b46194d8956f3357 Mon Sep 17 00:00:00 2001 From: Tokunori Ikegami Date: Wed, 30 May 2018 18:32:29 +0900 Subject: [PATCH 0111/1291] mtd: cfi_cmdset_0002: Change erase functions to check chip good only commit 79ca484b613041ca223f74b34608bb6f5221724b upstream. Currently the functions use to check both chip ready and good. But the chip ready is not enough to check the operation status. So change this to check the chip good instead of this. About the retry functions to make sure the error handling remain it. Signed-off-by: Tokunori Ikegami Reviewed-by: Joakim Tjernlund Cc: Chris Packham Cc: Brian Norris Cc: David Woodhouse Cc: Boris Brezillon Cc: Marek Vasut Cc: Richard Weinberger Cc: Cyrille Pitchen Cc: linux-mtd@lists.infradead.org Cc: stable@vger.kernel.org Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/chips/cfi_cmdset_0002.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 729c1b8a57b2..af3d207c9cc4 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -2296,12 +2296,13 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip) chip->erase_suspended = 0; } - if (chip_ready(map, adr)) + if (chip_good(map, adr, map_word_ff(map))) break; if (time_after(jiffies, timeo)) { printk(KERN_WARNING "MTD %s(): software timeout\n", __func__ ); + ret = -EIO; break; } @@ -2309,15 +2310,15 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip) UDELAY(map, chip, adr, 1000000/HZ); } /* Did we succeed? */ - if (!chip_good(map, adr, map_word_ff(map))) { + if (ret) { /* reset on all failures. */ map_write( map, CMD(0xF0), chip->start ); /* FIXME - should have reset delay before continuing */ - if (++retry_cnt <= MAX_RETRIES) + if (++retry_cnt <= MAX_RETRIES) { + ret = 0; goto retry; - - ret = -EIO; + } } chip->state = FL_READY; @@ -2391,7 +2392,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, chip->erase_suspended = 0; } - if (chip_ready(map, adr)) { + if (chip_good(map, adr, map_word_ff(map))) { xip_enable(map, chip, adr); break; } @@ -2400,6 +2401,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, xip_enable(map, chip, adr); printk(KERN_WARNING "MTD %s(): software timeout\n", __func__ ); + ret = -EIO; break; } @@ -2407,15 +2409,15 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, UDELAY(map, chip, adr, 1000000/HZ); } /* Did we succeed? */ - if (!chip_good(map, adr, map_word_ff(map))) { + if (ret) { /* reset on all failures. */ map_write( map, CMD(0xF0), chip->start ); /* FIXME - should have reset delay before continuing */ - if (++retry_cnt <= MAX_RETRIES) + if (++retry_cnt <= MAX_RETRIES) { + ret = 0; goto retry; - - ret = -EIO; + } } chip->state = FL_READY; -- GitLab From 6d2b458d3d140bf75899aa1ef4349d7921bb7bf5 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 25 Jun 2018 17:22:00 +0200 Subject: [PATCH 0112/1291] netfilter: nf_log: don't hold nf_log_mutex during user access commit ce00bf07cc95a57cd20b208e02b3c2604e532ae8 upstream. The old code would indefinitely block other users of nf_log_mutex if a userspace access in proc_dostring() blocked e.g. due to a userfaultfd region. Fix it by moving proc_dostring() out of the locked region. This is a followup to commit 266d07cb1c9a ("netfilter: nf_log: fix sleeping function called from invalid context"), which changed this code from using rcu_read_lock() to taking nf_log_mutex. Fixes: 266d07cb1c9a ("netfilter: nf_log: fix sleeping function calle[...]") Signed-off-by: Jann Horn Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_log.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8bb152a7cca4..276324abfa60 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -458,14 +458,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, rcu_assign_pointer(net->nf.nf_loggers[tindex], logger); mutex_unlock(&nf_log_mutex); } else { + struct ctl_table tmp = *table; + + tmp.data = buf; mutex_lock(&nf_log_mutex); logger = nft_log_dereference(net->nf.nf_loggers[tindex]); if (!logger) - table->data = "NONE"; + strlcpy(buf, "NONE", sizeof(buf)); else - table->data = logger->name; - r = proc_dostring(table, write, buffer, lenp, ppos); + strlcpy(buf, logger->name, sizeof(buf)); mutex_unlock(&nf_log_mutex); + r = proc_dostring(&tmp, write, buffer, lenp, ppos); } return r; -- GitLab From 32199c810655f5251e388abdd63e2313c71ad4d8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 5 Jun 2018 12:36:30 +0300 Subject: [PATCH 0113/1291] staging: comedi: quatech_daqp_cs: fix no-op loop daqp_ao_insn_write() commit 1376b0a2160319125c3a2822e8c09bd283cd8141 upstream. There is a '>' vs '<' typo so this loop is a no-op. Fixes: d35dcc89fc93 ("staging: comedi: quatech_daqp_cs: fix daqp_ao_insn_write()") Signed-off-by: Dan Carpenter Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/quatech_daqp_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c index 802f51e46405..171960568356 100644 --- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c +++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c @@ -642,7 +642,7 @@ static int daqp_ao_insn_write(struct comedi_device *dev, /* Make sure D/A update mode is direct update */ outb(0, dev->iobase + DAQP_AUX_REG); - for (i = 0; i > insn->n; i++) { + for (i = 0; i < insn->n; i++) { unsigned int val = data[i]; int ret; -- GitLab From 7cf346dfdea552f5ca71b85cf7389d347269b40c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 24 May 2018 15:26:48 +0200 Subject: [PATCH 0114/1291] sched, tracing: Fix trace_sched_pi_setprio() for deboosting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4ff648decf4712d39f184fc2df3163f43975575a upstream. Since the following commit: b91473ff6e97 ("sched,tracing: Update trace_sched_pi_setprio()") the sched_pi_setprio trace point shows the "newprio" during a deboost: |futex sched_pi_setprio: comm=futex_requeue_p pid"34 oldprio˜ newprio=3D98 |futex sched_switch: prev_comm=futex_requeue_p prev_pid"34 prev_prio=120 This patch open codes __rt_effective_prio() in the tracepoint as the 'newprio' to get the old behaviour back / the correct priority: |futex sched_pi_setprio: comm=futex_requeue_p pid"20 oldprio˜ newprio=3D120 |futex sched_switch: prev_comm=futex_requeue_p prev_pid"20 prev_prio=120 Peter suggested to open code the new priority so people using tracehook could get the deadline data out. Reported-by: Mansky Christian Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Fixes: b91473ff6e97 ("sched,tracing: Update trace_sched_pi_setprio()") Link: http://lkml.kernel.org/r/20180524132647.gg6ziuogczdmjjzu@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/trace/events/sched.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index da10aa21bebc..d447f24df970 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -435,7 +435,9 @@ TRACE_EVENT(sched_pi_setprio, memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->oldprio = tsk->prio; - __entry->newprio = pi_task ? pi_task->prio : tsk->prio; + __entry->newprio = pi_task ? + min(tsk->normal_prio, pi_task->prio) : + tsk->normal_prio; /* XXX SCHED_DEADLINE bits missing */ ), -- GitLab From b3ef356a096e218244b24e701abca72fc08e0e07 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 27 Jun 2018 23:26:05 -0700 Subject: [PATCH 0115/1291] Revert mm/vmstat.c: fix vmstat_update() preemption BUG commit 28557cc106e6d2aa8b8c5c7687ea9f8055ff3911 upstream. Revert commit c7f26ccfb2c3 ("mm/vmstat.c: fix vmstat_update() preemption BUG"). Steven saw a "using smp_processor_id() in preemptible" message and added a preempt_disable() section around it to keep it quiet. This is not the right thing to do it does not fix the real problem. vmstat_update() is invoked by a kworker on a specific CPU. This worker it bound to this CPU. The name of the worker was "kworker/1:1" so it should have been a worker which was bound to CPU1. A worker which can run on any CPU would have a `u' before the first digit. smp_processor_id() can be used in a preempt-enabled region as long as the task is bound to a single CPU which is the case here. If it could run on an arbitrary CPU then this is the problem we have an should seek to resolve. Not only this smp_processor_id() must not be migrated to another CPU but also refresh_cpu_vm_stats() which might access wrong per-CPU variables. Not to mention that other code relies on the fact that such a worker runs on one specific CPU only. Therefore revert that commit and we should look instead what broke the affinity mask of the kworker. Link: http://lkml.kernel.org/r/20180504104451.20278-1-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Cc: Steven J. Hill Cc: Tejun Heo Cc: Vlastimil Babka Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmstat.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index e085b13c572e..4bb13e72ac97 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1770,11 +1770,9 @@ static void vmstat_update(struct work_struct *w) * to occur in the future. Keep on running the * update worker thread. */ - preempt_disable(); queue_delayed_work_on(smp_processor_id(), mm_percpu_wq, this_cpu_ptr(&vmstat_work), round_jiffies_relative(sysctl_stat_interval)); - preempt_enable(); } } -- GitLab From 1e92e813554a93741666e9f378a83d70405b9076 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jul 2018 16:29:25 +0200 Subject: [PATCH 0116/1291] Linux 4.14.55 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index de0955d8dfa3..0700feaaa6cf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 54 +SUBLEVEL = 55 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 99ebaf4f43dd25e18baf02c994c7939f1257f8a3 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sun, 8 Apr 2018 06:36:40 -0400 Subject: [PATCH 0117/1291] media: rc: mce_kbd decoder: fix stuck keys commit 63039c29f7a4ce8a8bd165173840543c0098d7b0 upstream. The MCE Remote sends a 0 scancode when keys are released. If this is not received or decoded, then keys can get "stuck"; the keyup event is not sent since the input_sync() is missing from the timeout handler. Cc: stable@vger.kernel.org Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/ir-mce_kbd-decoder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/rc/ir-mce_kbd-decoder.c b/drivers/media/rc/ir-mce_kbd-decoder.c index 7c572a643656..2a1728edb3c6 100644 --- a/drivers/media/rc/ir-mce_kbd-decoder.c +++ b/drivers/media/rc/ir-mce_kbd-decoder.c @@ -130,6 +130,8 @@ static void mce_kbd_rx_timeout(unsigned long data) for (i = 0; i < MCIR2_MASK_KEYS_START; i++) input_report_key(mce_kbd->idev, kbd_keycodes[i], 0); + + input_sync(mce_kbd->idev); } static enum mce_kbd_mode mce_kbd_mode(struct mce_kbd_dec *data) -- GitLab From 77f738e84293f10980ea94cbcf57a8f204295f78 Mon Sep 17 00:00:00 2001 From: Kai Chieh Chuang Date: Fri, 27 Apr 2018 10:11:35 +0800 Subject: [PATCH 0118/1291] ASoC: mediatek: preallocate pages use platform device commit 5845e6155d8f4a4a9bae2d4c1d1bb4a4d9a925c2 upstream. preallocate pages should use platform device, since we set dma mask for platform device. Signed-off-by: KaiChieh Chuang Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- sound/soc/mediatek/common/mtk-afe-platform-driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/mediatek/common/mtk-afe-platform-driver.c b/sound/soc/mediatek/common/mtk-afe-platform-driver.c index 82d439c15f4e..e58fbefa5e6b 100644 --- a/sound/soc/mediatek/common/mtk-afe-platform-driver.c +++ b/sound/soc/mediatek/common/mtk-afe-platform-driver.c @@ -63,13 +63,13 @@ static const struct snd_pcm_ops mtk_afe_pcm_ops = { static int mtk_afe_pcm_new(struct snd_soc_pcm_runtime *rtd) { size_t size; - struct snd_card *card = rtd->card->snd_card; struct snd_pcm *pcm = rtd->pcm; struct mtk_base_afe *afe = snd_soc_platform_get_drvdata(rtd->platform); size = afe->mtk_afe_hardware->buffer_bytes_max; return snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, - card->dev, size, size); + rtd->platform->dev, + size, size); } static void mtk_afe_pcm_free(struct snd_pcm *pcm) -- GitLab From 14ab9902422c96cf2ab4db289edaf10ee605059c Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 22 Jun 2018 10:55:45 -0700 Subject: [PATCH 0119/1291] MIPS: Call dump_stack() from show_regs() commit 5a267832c2ec47b2dad0fdb291a96bb5b8869315 upstream. The generic nmi_cpu_backtrace() function calls show_regs() when a struct pt_regs is available, and dump_stack() otherwise. If we were to make use of the generic nmi_cpu_backtrace() with MIPS' current implementation of show_regs() this would mean that we see only register data with no accompanying stack information, in contrast with our current implementation which calls dump_stack() regardless of whether register state is available. In preparation for making use of the generic nmi_cpu_backtrace() to implement arch_trigger_cpumask_backtrace(), have our implementation of show_regs() call dump_stack() and drop the explicit dump_stack() call in arch_dump_stack() which is invoked by arch_trigger_cpumask_backtrace(). This will allow the output we produce to remain the same after a later patch switches to using nmi_cpu_backtrace(). It may mean that we produce extra stack output in other uses of show_regs(), but this: 1) Seems harmless. 2) Is good for consistency between arch_trigger_cpumask_backtrace() and other users of show_regs(). 3) Matches the behaviour of the ARM & PowerPC architectures. Marked for stable back to v4.9 as a prerequisite of the following patch "MIPS: Call dump_stack() from show_regs()". Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/19596/ Cc: James Hogan Cc: Ralf Baechle Cc: Huacai Chen Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 4 ++-- arch/mips/kernel/traps.c | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index e1ddb94a6522..4d2a2b093553 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -663,8 +663,8 @@ static void arch_dump_stack(void *info) if (regs) show_regs(regs); - - dump_stack(); + else + dump_stack(); } void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 5669d3b8bd38..583aed906933 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -351,6 +351,7 @@ static void __show_regs(const struct pt_regs *regs) void show_regs(struct pt_regs *regs) { __show_regs((struct pt_regs *)regs); + dump_stack(); } void show_registers(struct pt_regs *regs) -- GitLab From 0818c44b8df72205298208b21a14c9e3959bb3b3 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 22 Jun 2018 10:55:46 -0700 Subject: [PATCH 0120/1291] MIPS: Use async IPIs for arch_trigger_cpumask_backtrace() commit b63e132b6433a41cf311e8bc382d33fd2b73b505 upstream. The current MIPS implementation of arch_trigger_cpumask_backtrace() is broken because it attempts to use synchronous IPIs despite the fact that it may be run with interrupts disabled. This means that when arch_trigger_cpumask_backtrace() is invoked, for example by the RCU CPU stall watchdog, we may: - Deadlock due to use of synchronous IPIs with interrupts disabled, causing the CPU that's attempting to generate the backtrace output to hang itself. - Not succeed in generating the desired output from remote CPUs. - Produce warnings about this from smp_call_function_many(), for example: [42760.526910] INFO: rcu_sched detected stalls on CPUs/tasks: [42760.535755] 0-...!: (1 GPs behind) idle=ade/140000000000000/0 softirq=526944/526945 fqs=0 [42760.547874] 1-...!: (0 ticks this GP) idle=e4a/140000000000000/0 softirq=547885/547885 fqs=0 [42760.559869] (detected by 2, t=2162 jiffies, g=266689, c=266688, q=33) [42760.568927] ------------[ cut here ]------------ [42760.576146] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:416 smp_call_function_many+0x88/0x20c [42760.587839] Modules linked in: [42760.593152] CPU: 2 PID: 1216 Comm: sh Not tainted 4.15.4-00373-gee058bb4d0c2 #2 [42760.603767] Stack : 8e09bd20 8e09bd20 8e09bd20 fffffff0 00000007 00000006 00000000 8e09bca8 [42760.616937] 95b2b379 95b2b379 807a0080 00000007 81944518 0000018a 00000032 00000000 [42760.630095] 00000000 00000030 80000000 00000000 806eca74 00000009 8017e2b8 000001a0 [42760.643169] 00000000 00000002 00000000 8e09baa4 00000008 808b8008 86d69080 8e09bca0 [42760.656282] 8e09ad50 805e20aa 00000000 00000000 00000000 8017e2b8 00000009 801070ca [42760.669424] ... [42760.673919] Call Trace: [42760.678672] [<27fde568>] show_stack+0x70/0xf0 [42760.685417] [<84751641>] dump_stack+0xaa/0xd0 [42760.692188] [<699d671c>] __warn+0x80/0x92 [42760.698549] [<68915d41>] warn_slowpath_null+0x28/0x36 [42760.705912] [] smp_call_function_many+0x88/0x20c [42760.713696] [<6bbdfc2a>] arch_trigger_cpumask_backtrace+0x30/0x4a [42760.722216] [] rcu_dump_cpu_stacks+0x6a/0x98 [42760.729580] [<796e7629>] rcu_check_callbacks+0x672/0x6ac [42760.737476] [<059b3b43>] update_process_times+0x18/0x34 [42760.744981] [<6eb94941>] tick_sched_handle.isra.5+0x26/0x38 [42760.752793] [<478d3d70>] tick_sched_timer+0x1c/0x50 [42760.759882] [] __hrtimer_run_queues+0xc6/0x226 [42760.767418] [] hrtimer_interrupt+0x88/0x19a [42760.775031] [<6765a19e>] gic_compare_interrupt+0x2e/0x3a [42760.782761] [<0558bf5f>] handle_percpu_devid_irq+0x78/0x168 [42760.790795] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.798117] [<1b6d462c>] gic_handle_local_int+0x38/0x86 [42760.805545] [] gic_irq_dispatch+0xa/0x14 [42760.812534] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.820086] [] do_IRQ+0x16/0x20 [42760.826274] [<9aef3ce6>] plat_irq_dispatch+0x62/0x94 [42760.833458] [<6a94b53c>] except_vec_vi_end+0x70/0x78 [42760.840655] [<22284043>] smp_call_function_many+0x1ba/0x20c [42760.848501] [<54022b58>] smp_call_function+0x1e/0x2c [42760.855693] [] flush_tlb_mm+0x2a/0x98 [42760.862730] [<0844cdd0>] tlb_flush_mmu+0x1c/0x44 [42760.869628] [] arch_tlb_finish_mmu+0x26/0x3e [42760.877021] [<1aeaaf74>] tlb_finish_mmu+0x18/0x66 [42760.883907] [] exit_mmap+0x76/0xea [42760.890428] [] mmput+0x80/0x11a [42760.896632] [] do_exit+0x1f4/0x80c [42760.903158] [] do_group_exit+0x20/0x7e [42760.909990] [<13fa8d54>] __wake_up_parent+0x0/0x1e [42760.917045] [<46cf89d0>] smp_call_function_many+0x1a2/0x20c [42760.924893] [<8c21a93b>] syscall_common+0x14/0x1c [42760.931765] ---[ end trace 02aa09da9dc52a60 ]--- [42760.938342] ------------[ cut here ]------------ [42760.945311] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:291 smp_call_function_single+0xee/0xf8 ... This patch switches MIPS' arch_trigger_cpumask_backtrace() to use async IPIs & smp_call_function_single_async() in order to resolve this problem. We ensure use of the pre-allocated call_single_data_t structures is serialized by maintaining a cpumask indicating that they're busy, and refusing to attempt to send an IPI when a CPU's bit is set in this mask. This should only happen if a CPU hasn't responded to a previous backtrace IPI - ie. if it's hung - and we print a warning to the console in this case. I've marked this for stable branches as far back as v4.9, to which it applies cleanly. Strictly speaking the faulty MIPS implementation can be traced further back to commit 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") in v3.19, but kernel versions v3.19 through v4.8 will require further work to backport due to the rework performed in commit 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods"). Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/19597/ Cc: James Hogan Cc: Ralf Baechle Cc: Huacai Chen Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.9+ Fixes: 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") Fixes: 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods") Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 45 +++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 4d2a2b093553..e8d772a2597d 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -655,28 +656,42 @@ unsigned long arch_align_stack(unsigned long sp) return sp & ALMASK; } -static void arch_dump_stack(void *info) -{ - struct pt_regs *regs; +static DEFINE_PER_CPU(call_single_data_t, backtrace_csd); +static struct cpumask backtrace_csd_busy; - regs = get_irq_regs(); - - if (regs) - show_regs(regs); - else - dump_stack(); +static void handle_backtrace(void *info) +{ + nmi_cpu_backtrace(get_irq_regs()); + cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy); } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +static void raise_backtrace(cpumask_t *mask) { - long this_cpu = get_cpu(); + call_single_data_t *csd; + int cpu; - if (cpumask_test_cpu(this_cpu, mask) && !exclude_self) - dump_stack(); + for_each_cpu(cpu, mask) { + /* + * If we previously sent an IPI to the target CPU & it hasn't + * cleared its bit in the busy cpumask then it didn't handle + * our previous IPI & it's not safe for us to reuse the + * call_single_data_t. + */ + if (cpumask_test_and_set_cpu(cpu, &backtrace_csd_busy)) { + pr_warn("Unable to send backtrace IPI to CPU%u - perhaps it hung?\n", + cpu); + continue; + } - smp_call_function_many(mask, arch_dump_stack, NULL, 1); + csd = &per_cpu(backtrace_csd, cpu); + csd->func = handle_backtrace; + smp_call_function_single_async(cpu, csd); + } +} - put_cpu(); +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +{ + nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace); } int mips_get_process_fp_mode(struct task_struct *task) -- GitLab From 6fce06b68b44396cedc396b63728e100c89dbdd6 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 5 Jul 2018 14:37:52 -0700 Subject: [PATCH 0121/1291] MIPS: Fix ioremap() RAM check commit 523402fa9101090c91d2033b7ebdfdcf65880488 upstream. We currently attempt to check whether a physical address range provided to __ioremap() may be in use by the page allocator by examining the value of PageReserved for each page in the region - lowmem pages not marked reserved are presumed to be in use by the page allocator, and requests to ioremap them fail. The way we check this has been broken since commit 92923ca3aace ("mm: meminit: only set page reserved in the memblock region"), because memblock will typically not have any knowledge of non-RAM pages and therefore those pages will not have the PageReserved flag set. Thus when we attempt to ioremap a region outside of RAM we incorrectly fail believing that the region is RAM that may be in use. In most cases ioremap() on MIPS will take a fast-path to use the unmapped kseg1 or xkphys virtual address spaces and never hit this path, so the only way to hit it is for a MIPS32 system to attempt to ioremap() an address range in lowmem with flags other than _CACHE_UNCACHED. Perhaps the most straightforward way to do this is using ioremap_uncached_accelerated(), which is how the problem was discovered. Fix this by making use of walk_system_ram_range() to test the address range provided to __ioremap() against only RAM pages, rather than all lowmem pages. This means that if we have a lowmem I/O region, which is very common for MIPS systems, we're free to ioremap() address ranges within it. A nice bonus is that the test is no longer limited to lowmem. The approach here matches the way x86 performed the same test after commit c81c8a1eeede ("x86, ioremap: Speed up check for RAM pages") until x86 moved towards a slightly more complicated check using walk_mem_res() for unrelated reasons with commit 0e4c12b45aa8 ("x86/mm, resource: Use PAGE_KERNEL protection for ioremap of memory pages"). Signed-off-by: Paul Burton Reported-by: Serge Semin Tested-by: Serge Semin Fixes: 92923ca3aace ("mm: meminit: only set page reserved in the memblock region") Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.2+ Patchwork: https://patchwork.linux-mips.org/patch/19786/ Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/ioremap.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c index 1986e09fb457..1601d90b087b 100644 --- a/arch/mips/mm/ioremap.c +++ b/arch/mips/mm/ioremap.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -98,6 +99,20 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr, return error; } +static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, + void *arg) +{ + unsigned long i; + + for (i = 0; i < nr_pages; i++) { + if (pfn_valid(start_pfn + i) && + !PageReserved(pfn_to_page(start_pfn + i))) + return 1; + } + + return 0; +} + /* * Generic mapping function (not visible outside): */ @@ -116,8 +131,8 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr, void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long flags) { + unsigned long offset, pfn, last_pfn; struct vm_struct * area; - unsigned long offset; phys_addr_t last_addr; void * addr; @@ -137,18 +152,16 @@ void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long return (void __iomem *) CKSEG1ADDR(phys_addr); /* - * Don't allow anybody to remap normal RAM that we're using.. + * Don't allow anybody to remap RAM that may be allocated by the page + * allocator, since that could lead to races & data clobbering. */ - if (phys_addr < virt_to_phys(high_memory)) { - char *t_addr, *t_end; - struct page *page; - - t_addr = __va(phys_addr); - t_end = t_addr + (size - 1); - - for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) - if(!PageReserved(page)) - return NULL; + pfn = PFN_DOWN(phys_addr); + last_pfn = PFN_DOWN(last_addr); + if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL, + __ioremap_check_ram) == 1) { + WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n", + &phys_addr, &last_addr); + return NULL; } /* -- GitLab From 2c9fa8ff7045bb8604c6e31ead62f9e026a36a1a Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 4 Jul 2018 17:07:45 +0200 Subject: [PATCH 0122/1291] mmc: sdhci-esdhc-imx: allow 1.8V modes without 100/200MHz pinctrl states commit 92748beac07c471d995fbec642b63572dc01b3dc upstream. If pinctrl nodes for 100/200MHz are missing, the controller should not select any mode which need signal frequencies 100MHz or higher. To prevent such speed modes the driver currently uses the quirk flag SDHCI_QUIRK2_NO_1_8_V. This works nicely for SD cards since 1.8V signaling is required for all faster modes and slower modes use 3.3V signaling only. However, there are eMMC modes which use 1.8V signaling and run below 100MHz, e.g. DDR52 at 1.8V. With using SDHCI_QUIRK2_NO_1_8_V this mode is prevented. When using a fixed 1.8V regulator as vqmmc-supply the stack has no valid mode to use. In this tenuous situation the kernel continuously prints voltage switching errors: mmc1: Switching to 3.3V signalling voltage failed Avoid using SDHCI_QUIRK2_NO_1_8_V and prevent faster modes by altering the SDHCI capability register. With that the stack is able to select 1.8V modes even if no faster pinctrl states are available: # cat /sys/kernel/debug/mmc1/ios ... timing spec: 8 (mmc DDR52) signal voltage: 1 (1.80 V) ... Link: http://lkml.kernel.org/r/20180628081331.13051-1-stefan@agner.ch Signed-off-by: Stefan Agner Fixes: ad93220de7da ("mmc: sdhci-esdhc-imx: change pinctrl state according to uhs mode") Cc: # v4.13+ Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-esdhc-imx.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 8b941f814472..c81de2f25281 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -305,6 +305,15 @@ static u32 esdhc_readl_le(struct sdhci_host *host, int reg) if (imx_data->socdata->flags & ESDHC_FLAG_HS400) val |= SDHCI_SUPPORT_HS400; + + /* + * Do not advertise faster UHS modes if there are no + * pinctrl states for 100MHz/200MHz. + */ + if (IS_ERR_OR_NULL(imx_data->pins_100mhz) || + IS_ERR_OR_NULL(imx_data->pins_200mhz)) + val &= ~(SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_DDR50 + | SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_HS400); } } @@ -1135,18 +1144,6 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev, ESDHC_PINCTRL_STATE_100MHZ); imx_data->pins_200mhz = pinctrl_lookup_state(imx_data->pinctrl, ESDHC_PINCTRL_STATE_200MHZ); - if (IS_ERR(imx_data->pins_100mhz) || - IS_ERR(imx_data->pins_200mhz)) { - dev_warn(mmc_dev(host->mmc), - "could not get ultra high speed state, work on normal mode\n"); - /* - * fall back to not supporting uhs by specifying no - * 1.8v quirk - */ - host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V; - } - } else { - host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V; } /* call to generic mmc_of_parse to support additional capabilities */ -- GitLab From ccb242ec502013f49c8c6117037e766c57c3a06b Mon Sep 17 00:00:00 2001 From: x00270170 Date: Tue, 3 Jul 2018 15:06:27 +0800 Subject: [PATCH 0123/1291] mmc: dw_mmc: fix card threshold control configuration commit 7a6b9f4d601dfce8cb68f0dcfd834270280e31e6 upstream. Card write threshold control is supposed to be set since controller version 2.80a for data write in HS400 mode and data read in HS200/HS400/SDR104 mode. However the current code returns without configuring it in the case of data writing in HS400 mode. Meanwhile the patch fixes that the current code goes to 'disable' when doing data reading in HS400 mode. Fixes: 7e4bf1bc9543 ("mmc: dw_mmc: add the card write threshold for HS400 mode") Signed-off-by: Qing Xia Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/dw_mmc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index de31e20dc56c..6a2cbbba29aa 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -1089,8 +1089,8 @@ static void dw_mci_ctrl_thld(struct dw_mci *host, struct mmc_data *data) * It's used when HS400 mode is enabled. */ if (data->flags & MMC_DATA_WRITE && - !(host->timing != MMC_TIMING_MMC_HS400)) - return; + host->timing != MMC_TIMING_MMC_HS400) + goto disable; if (data->flags & MMC_DATA_WRITE) enable = SDMMC_CARD_WR_THR_EN; @@ -1098,7 +1098,8 @@ static void dw_mci_ctrl_thld(struct dw_mci *host, struct mmc_data *data) enable = SDMMC_CARD_RD_THR_EN; if (host->timing != MMC_TIMING_MMC_HS200 && - host->timing != MMC_TIMING_UHS_SDR104) + host->timing != MMC_TIMING_UHS_SDR104 && + host->timing != MMC_TIMING_MMC_HS400) goto disable; blksz_depth = blksz / (1 << host->data_shift); -- GitLab From 908bfe10dd14e8c4d858bed3e37c4748819f1bf6 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 7 Jul 2018 04:16:33 +0200 Subject: [PATCH 0124/1291] ibmasm: don't write out of bounds in read handler commit a0341fc1981a950c1e902ab901e98f60e0e243f3 upstream. This read handler had a lot of custom logic and wrote outside the bounds of the provided buffer. This could lead to kernel and userspace memory corruption. Just use simple_read_from_buffer() with a stack buffer. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ibmasm/ibmasmfs.c | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index e05c3245930a..fa840666bdd1 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -507,35 +507,14 @@ static int remote_settings_file_close(struct inode *inode, struct file *file) static ssize_t remote_settings_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { void __iomem *address = (void __iomem *)file->private_data; - unsigned char *page; - int retval; int len = 0; unsigned int value; - - if (*offset < 0) - return -EINVAL; - if (count == 0 || count > 1024) - return 0; - if (*offset != 0) - return 0; - - page = (unsigned char *)__get_free_page(GFP_KERNEL); - if (!page) - return -ENOMEM; + char lbuf[20]; value = readl(address); - len = sprintf(page, "%d\n", value); - - if (copy_to_user(buf, page, len)) { - retval = -EFAULT; - goto exit; - } - *offset += len; - retval = len; + len = snprintf(lbuf, sizeof(lbuf), "%d\n", value); -exit: - free_page((unsigned long)page); - return retval; + return simple_read_from_buffer(buf, count, offset, lbuf, len); } static ssize_t remote_settings_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset) -- GitLab From e5bb39faedd770f6fc56f5a98f72c4e4304aa294 Mon Sep 17 00:00:00 2001 From: Murray McAllister Date: Mon, 2 Jul 2018 13:07:28 +1200 Subject: [PATCH 0125/1291] staging: rtl8723bs: Prevent an underflow in rtw_check_beacon_data(). commit 920c92448839bd4f8eb87a92b08cad56d449caff upstream. Dan Carpenter reported an integer underflow issue in the rtl8188eu driver. This is also needed for the length (signed integer) in rtl8723bs, as it is later converted to an unsigned integer and used in a memcpy operation. Original issue is at https://patchwork.kernel.org/patch/9796371/ Reported-by: Dan Carpenter Signed-off-by: Murray McAllister Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_ap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_ap.c b/drivers/staging/rtl8723bs/core/rtw_ap.c index d3007c1c45e3..a84400f07a38 100644 --- a/drivers/staging/rtl8723bs/core/rtw_ap.c +++ b/drivers/staging/rtl8723bs/core/rtw_ap.c @@ -1059,7 +1059,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len) return _FAIL; - if (len > MAX_IE_SZ) + if (len < 0 || len > MAX_IE_SZ) return _FAIL; pbss_network->IELength = len; -- GitLab From a3bb42c1bc6c1d83abb645afbc28ba7f25a97c38 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 6 Jul 2018 13:44:35 +0800 Subject: [PATCH 0126/1291] staging: r8822be: Fix RTL8822be can't find any wireless AP commit d59d2f9995d28974877750f429e821324bd603c7 upstream. RTL8822be can't bring up properly on ASUS X530UN, and dmesg says: [ 8.591333] r8822be: module is from the staging directory, the quality is unknown, you have been warned. [ 8.593122] r8822be 0000:02:00.0: enabling device (0000 -> 0003) [ 8.669163] r8822be: Using firmware rtlwifi/rtl8822befw.bin [ 9.289939] r8822be: rtlwifi: wireless switch is on [ 10.056426] r8822be 0000:02:00.0 wlp2s0: renamed from wlan0 ... [ 11.952534] r8822be: halmac_init_hal failed [ 11.955933] r8822be: halmac_init_hal failed [ 11.956227] r8822be: halmac_init_hal failed [ 22.007942] r8822be: halmac_init_hal failed Jian-Hong reported it works if turn off ASPM with module parameter aspm=0. In order to fix this problem kindly, this commit don't turn off aspm but enlarge ASPM L1 latency to 7. Reported-by: Jian-Hong Pan Tested-by: Jian-Hong Pan Signed-off-by: Ping-Ke Shih Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtlwifi/rtl8822be/hw.c | 2 +- drivers/staging/rtlwifi/wifi.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rtlwifi/rtl8822be/hw.c b/drivers/staging/rtlwifi/rtl8822be/hw.c index 74386003044f..c6db2bd20594 100644 --- a/drivers/staging/rtlwifi/rtl8822be/hw.c +++ b/drivers/staging/rtlwifi/rtl8822be/hw.c @@ -814,7 +814,7 @@ static void _rtl8822be_enable_aspm_back_door(struct ieee80211_hw *hw) return; pci_read_config_byte(rtlpci->pdev, 0x70f, &tmp); - pci_write_config_byte(rtlpci->pdev, 0x70f, tmp | BIT(7)); + pci_write_config_byte(rtlpci->pdev, 0x70f, tmp | ASPM_L1_LATENCY << 3); pci_read_config_byte(rtlpci->pdev, 0x719, &tmp); pci_write_config_byte(rtlpci->pdev, 0x719, tmp | BIT(3) | BIT(4)); diff --git a/drivers/staging/rtlwifi/wifi.h b/drivers/staging/rtlwifi/wifi.h index eb91c130b245..5f0bc363ad41 100644 --- a/drivers/staging/rtlwifi/wifi.h +++ b/drivers/staging/rtlwifi/wifi.h @@ -99,6 +99,7 @@ #define RTL_USB_MAX_RX_COUNT 100 #define QBSS_LOAD_SIZE 5 #define MAX_WMMELE_LENGTH 64 +#define ASPM_L1_LATENCY 7 #define TOTAL_CAM_ENTRY 32 -- GitLab From aae31a169efbd0ee4000c02089597256439bff54 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 26 Jun 2018 20:56:54 +0900 Subject: [PATCH 0127/1291] ata: Fix ZBC_OUT command block check commit b320a0a9f23c98f21631eb27bcbbca91c79b1c6e upstream. The block (LBA) specified must not exceed the last addressable LBA, which is dev->nr_sectors - 1. So fix the correct check is "if (block >= dev->n_sectors)" and not "if (block > dev->n_sectords)". Additionally, the asc/ascq to return for an LBA that is not a zone start LBA should be ILLEGAL REQUEST, regardless if the bad LBA is out of range. Reported-by: David Butterfield Signed-off-by: Damien Le Moal Cc: stable@vger.kernel.org Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 6b0440a12c51..3f8c3613d6dd 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3801,8 +3801,13 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc) */ goto invalid_param_len; } - if (block > dev->n_sectors) - goto out_of_range; + if (block >= dev->n_sectors) { + /* + * Block must be a valid zone ID (a zone start LBA). + */ + fp = 2; + goto invalid_fld; + } all = cdb[14] & 0x1; @@ -3833,10 +3838,6 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc) invalid_fld: ata_scsi_set_invalid_field(qc->dev, scmd, fp, 0xff); return 1; - out_of_range: - /* "Logical Block Address out of range" */ - ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x21, 0x00); - return 1; invalid_param_len: /* "Parameter list length error" */ ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x1a, 0x0); -- GitLab From 33b9257a64fe16277422062bd6ba2a090e78dbcf Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 26 Jun 2018 20:56:55 +0900 Subject: [PATCH 0128/1291] ata: Fix ZBC_OUT all bit handling commit 6edf1d4cb0acde3a0a5dac849f33031bd7abb7b1 upstream. If the ALL bit is set in the ZBC_OUT command, the command zone ID field (block) should be ignored. Reported-by: David Butterfield Signed-off-by: Damien Le Moal Cc: stable@vger.kernel.org Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 3f8c3613d6dd..bf5777bc04d3 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3801,7 +3801,14 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc) */ goto invalid_param_len; } - if (block >= dev->n_sectors) { + + all = cdb[14] & 0x1; + if (all) { + /* + * Ignore the block address (zone ID) as defined by ZBC. + */ + block = 0; + } else if (block >= dev->n_sectors) { /* * Block must be a valid zone ID (a zone start LBA). */ @@ -3809,8 +3816,6 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc) goto invalid_fld; } - all = cdb[14] & 0x1; - if (ata_ncq_enabled(qc->dev) && ata_fpdma_zac_mgmt_out_supported(qc->dev)) { tf->protocol = ATA_PROT_NCQ_NODATA; -- GitLab From 1e39eb1bb1c07b6c46f8d8b712095b182bdfcc87 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jul 2018 19:27:13 -0700 Subject: [PATCH 0129/1291] vmw_balloon: fix inflation with batching commit 90d72ce079791399ac255c75728f3c9e747b093d upstream. Embarrassingly, the recent fix introduced worse problem than it solved, causing the balloon not to inflate. The VM informed the hypervisor that the pages for lock/unlock are sitting in the wrong address, as it used the page that is used the uninitialized page variable. Fixes: b23220fe054e9 ("vmw_balloon: fixing double free when batching mode is off") Cc: stable@vger.kernel.org Reviewed-by: Xavier Deguillard Signed-off-by: Nadav Amit Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_balloon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index efd733472a35..56c6f79a5c5a 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -467,7 +467,7 @@ static int vmballoon_send_batched_lock(struct vmballoon *b, unsigned int num_pages, bool is_2m_pages, unsigned int *target) { unsigned long status; - unsigned long pfn = page_to_pfn(b->page); + unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page)); STATS_INC(b->stats.lock[is_2m_pages]); @@ -515,7 +515,7 @@ static bool vmballoon_send_batched_unlock(struct vmballoon *b, unsigned int num_pages, bool is_2m_pages, unsigned int *target) { unsigned long status; - unsigned long pfn = page_to_pfn(b->page); + unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page)); STATS_INC(b->stats.unlock[is_2m_pages]); -- GitLab From 1fb3563fac7efaf0f2b8796f9f595b3209a79db9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 1 Jul 2018 12:15:46 +0200 Subject: [PATCH 0130/1291] ahci: Disable LPM on Lenovo 50 series laptops with a too old BIOS commit 240630e61870e62e39a97225048f9945848fa5f5 upstream. There have been several reports of LPM related hard freezes about once a day on multiple Lenovo 50 series models. Strange enough these reports where not disk model specific as LPM issues usually are and some users with the exact same disk + laptop where seeing them while other users where not seeing these issues. It turns out that enabling LPM triggers a firmware bug somewhere, which has been fixed in later BIOS versions. This commit adds a new ahci_broken_lpm() function and a new ATA_FLAG_NO_LPM for dealing with this. The ahci_broken_lpm() function contains DMI match info for the 4 models which are known to be affected by this and the DMI BIOS date field for known good BIOS versions. If the BIOS date is older then the one in the table LPM will be disabled and a warning will be printed. Note the BIOS dates are for known good versions, some older versions may work too, but we don't know for sure, the table is using dates from BIOS versions for which users have confirmed that upgrading to that version makes the problem go away. Unfortunately I've been unable to get hold of the reporter who reported that BIOS version 2.35 fixed the problems on the W541 for him. I've been able to verify the DMI_SYS_VENDOR and DMI_PRODUCT_VERSION from an older dmidecode, but I don't know the exact BIOS date as reported in the DMI. Lenovo keeps a changelog with dates in their release notes, but the dates there are the release dates not the build dates which are in DMI. So I've chosen to set the date to which we compare to one day past the release date of the 2.34 BIOS. I plan to fix this with a follow up commit once I've the necessary info. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 59 +++++++++++++++++++++++++++++++++++++++ drivers/ata/libata-core.c | 3 ++ include/linux/libata.h | 1 + 3 files changed, 63 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 75eb50041c99..f003e301723a 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1267,6 +1267,59 @@ static bool ahci_broken_suspend(struct pci_dev *pdev) return strcmp(buf, dmi->driver_data) < 0; } +static bool ahci_broken_lpm(struct pci_dev *pdev) +{ + static const struct dmi_system_id sysids[] = { + /* Various Lenovo 50 series have LPM issues with older BIOSen */ + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X250"), + }, + .driver_data = "20180406", /* 1.31 */ + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L450"), + }, + .driver_data = "20180420", /* 1.28 */ + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T450s"), + }, + .driver_data = "20180315", /* 1.33 */ + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W541"), + }, + /* + * Note date based on release notes, 2.35 has been + * reported to be good, but I've been unable to get + * a hold of the reporter to get the DMI BIOS date. + * TODO: fix this. + */ + .driver_data = "20180310", /* 2.35 */ + }, + { } /* terminate list */ + }; + const struct dmi_system_id *dmi = dmi_first_match(sysids); + int year, month, date; + char buf[9]; + + if (!dmi) + return false; + + dmi_get_date(DMI_BIOS_DATE, &year, &month, &date); + snprintf(buf, sizeof(buf), "%04d%02d%02d", year, month, date); + + return strcmp(buf, dmi->driver_data) < 0; +} + static bool ahci_broken_online(struct pci_dev *pdev) { #define ENCODE_BUSDEVFN(bus, slot, func) \ @@ -1677,6 +1730,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) "quirky BIOS, skipping spindown on poweroff\n"); } + if (ahci_broken_lpm(pdev)) { + pi.flags |= ATA_FLAG_NO_LPM; + dev_warn(&pdev->dev, + "BIOS update required for Link Power Management support\n"); + } + if (ahci_broken_suspend(pdev)) { hpriv->flags |= AHCI_HFLAG_NO_SUSPEND; dev_warn(&pdev->dev, diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index cad2530a5b52..6938bd86ff1c 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2501,6 +2501,9 @@ int ata_dev_configure(struct ata_device *dev) (id[ATA_ID_SATA_CAPABILITY] & 0xe) == 0x2) dev->horkage |= ATA_HORKAGE_NOLPM; + if (ap->flags & ATA_FLAG_NO_LPM) + dev->horkage |= ATA_HORKAGE_NOLPM; + if (dev->horkage & ATA_HORKAGE_NOLPM) { ata_dev_warn(dev, "LPM support broken, forcing max_power\n"); dev->link->ap->target_lpm_policy = ATA_LPM_MAX_POWER; diff --git a/include/linux/libata.h b/include/linux/libata.h index 931c32f1f18d..c5188dc389c8 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -211,6 +211,7 @@ enum { ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ /* (doesn't imply presence) */ ATA_FLAG_SATA = (1 << 1), + ATA_FLAG_NO_LPM = (1 << 2), /* host not happy with LPM */ ATA_FLAG_NO_LOG_PAGE = (1 << 5), /* do not issue log page read */ ATA_FLAG_NO_ATAPI = (1 << 6), /* No ATAPI support */ ATA_FLAG_PIO_DMA = (1 << 7), /* PIO cmds via DMA */ -- GitLab From 7ce4add980f54c2ef1fe4eca4136493200292793 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:29:38 +0300 Subject: [PATCH 0131/1291] USB: serial: ch341: fix type promotion bug in ch341_control_in() commit e33eab9ded328ccc14308afa51b5be7cbe78d30b upstream. The "r" variable is an int and "bufsize" is an unsigned int so the comparison is type promoted to unsigned. If usb_control_msg() returns a negative that is treated as a high positive value and the error handling doesn't work. Fixes: 2d5a9c72d0c4 ("USB: serial: ch341: fix control-message error handling") Signed-off-by: Dan Carpenter Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ch341.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 351745aec0e1..578596d301b8 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -131,7 +131,7 @@ static int ch341_control_in(struct usb_device *dev, r = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, value, index, buf, bufsize, DEFAULT_TIMEOUT); - if (r < bufsize) { + if (r < (int)bufsize) { if (r >= 0) { dev_err(&dev->dev, "short control message received (%d < %u)\n", -- GitLab From 7aa69d8f50be9fa1cf4ebe72d32b79a19f4dfb34 Mon Sep 17 00:00:00 2001 From: Olli Salonen Date: Wed, 4 Jul 2018 14:07:42 +0300 Subject: [PATCH 0132/1291] USB: serial: cp210x: add another USB ID for Qivicon ZigBee stick commit 367b160fe4717c14a2a978b6f9ffb75a7762d3ed upstream. There are two versions of the Qivicon Zigbee stick in circulation. This adds the second USB ID to the cp210x driver. Signed-off-by: Olli Salonen Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 142a83e5974c..c931ae689a91 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -152,6 +152,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8977) }, /* CEL MeshWorks DevKit Device */ { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */ { USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */ + { USB_DEVICE(0x10C4, 0x89FB) }, /* Qivicon ZigBee USB Radio Stick */ { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */ { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */ { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */ -- GitLab From f24b02c3a0818651f179bdf1026a8de67a97070f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Jul 2018 17:02:16 +0200 Subject: [PATCH 0133/1291] USB: serial: keyspan_pda: fix modem-status error handling commit 01b3cdfca263a17554f7b249d20a247b2a751521 upstream. Fix broken modem-status error handling which could lead to bits of slab data leaking to user space. Fixes: 3b36a8fd6777 ("usb: fix uninitialized variable warning in keyspan_pda") Cc: stable # 2.6.27 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/keyspan_pda.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index 196908dd25a1..f8e8285663a6 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -373,8 +373,10 @@ static int keyspan_pda_get_modem_info(struct usb_serial *serial, 3, /* get pins */ USB_TYPE_VENDOR|USB_RECIP_INTERFACE|USB_DIR_IN, 0, 0, data, 1, 2000); - if (rc >= 0) + if (rc == 1) *value = *data; + else if (rc >= 0) + rc = -EIO; kfree(data); return rc; -- GitLab From 90f2a76ccd37cce2530df49335bcea6cd0e23797 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 6 Jul 2018 17:12:56 +0200 Subject: [PATCH 0134/1291] USB: yurex: fix out-of-bounds uaccess in read handler commit f1e255d60ae66a9f672ff9a207ee6cd8e33d2679 upstream. In general, accessing userspace memory beyond the length of the supplied buffer in VFS read/write handlers can lead to both kernel memory corruption (via kernel_read()/kernel_write(), which can e.g. be triggered via sys_splice()) and privilege escalation inside userspace. Fix it by using simple_read_from_buffer() instead of custom logic. Fixes: 6bc235a2e24a ("USB: add driver for Meywa-Denki & Kayac YUREX") Signed-off-by: Jann Horn Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/yurex.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 58abdf28620a..47763311a42e 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -400,8 +400,7 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct usb_yurex *dev; - int retval = 0; - int bytes_read = 0; + int len = 0; char in_buffer[20]; unsigned long flags; @@ -409,26 +408,16 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count, mutex_lock(&dev->io_mutex); if (!dev->interface) { /* already disconnected */ - retval = -ENODEV; - goto exit; + mutex_unlock(&dev->io_mutex); + return -ENODEV; } spin_lock_irqsave(&dev->lock, flags); - bytes_read = snprintf(in_buffer, 20, "%lld\n", dev->bbu); + len = snprintf(in_buffer, 20, "%lld\n", dev->bbu); spin_unlock_irqrestore(&dev->lock, flags); - - if (*ppos < bytes_read) { - if (copy_to_user(buffer, in_buffer + *ppos, bytes_read - *ppos)) - retval = -EFAULT; - else { - retval = bytes_read - *ppos; - *ppos += bytes_read; - } - } - -exit: mutex_unlock(&dev->io_mutex); - return retval; + + return simple_read_from_buffer(buffer, count, ppos, in_buffer, len); } static ssize_t yurex_write(struct file *file, const char __user *user_buffer, -- GitLab From 82b9cb4d3f062aa80f15a4f2a2f567b93e2d0739 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Jul 2018 17:02:17 +0200 Subject: [PATCH 0135/1291] USB: serial: mos7840: fix status-register error handling commit 794744abfffef8b1f3c0c8a4896177d6d13d653d upstream. Add missing transfer-length sanity check to the status-register completion handler to avoid leaking bits of uninitialised slab data to user space. Fixes: 3f5429746d91 ("USB: Moschip 7840 USB-Serial Driver") Cc: stable # 2.6.19 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7840.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index e8669aae14b3..5e490177cf75 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -481,6 +481,9 @@ static void mos7840_control_callback(struct urb *urb) } dev_dbg(dev, "%s urb buffer size is %d\n", __func__, urb->actual_length); + if (urb->actual_length < 1) + goto out; + dev_dbg(dev, "%s mos7840_port->MsrLsr is %d port %d\n", __func__, mos7840_port->MsrLsr, mos7840_port->port_num); data = urb->transfer_buffer; -- GitLab From 55f51e5b4cffc6d98b945868281c67dbf0368c25 Mon Sep 17 00:00:00 2001 From: Nico Sneck Date: Mon, 2 Jul 2018 19:26:07 +0300 Subject: [PATCH 0136/1291] usb: quirks: add delay quirks for Corsair Strafe commit bba57eddadda936c94b5dccf73787cb9e159d0a5 upstream. Corsair Strafe appears to suffer from the same issues as the Corsair Strafe RGB. Apply the same quirks (control message delay and init delay) that the RGB version has to 1b1c:1b15. With these quirks in place the keyboard works correctly upon booting the system, and no longer requires reattaching the device. Signed-off-by: Nico Sneck Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 40ce175655e6..99f67764765f 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -231,6 +231,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Corsair K70 RGB */ { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Corsair Strafe */ + { USB_DEVICE(0x1b1c, 0x1b15), .driver_info = USB_QUIRK_DELAY_INIT | + USB_QUIRK_DELAY_CTRL_MSG }, + /* Corsair Strafe RGB */ { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT | USB_QUIRK_DELAY_CTRL_MSG }, -- GitLab From 7499390b8ba5e4c9d378d301f7fd7db9be28f922 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:48:53 +0300 Subject: [PATCH 0137/1291] xhci: xhci-mem: off by one in xhci_stream_id_to_ring() commit 313db3d6488bb03b61b99de9dbca061f1fd838e1 upstream. The > should be >= here so that we don't read one element beyond the end of the ep->stream_info->stream_rings[] array. Fixes: e9df17eb1408 ("USB: xhci: Correct assumptions about number of rings per endpoint.") Signed-off-by: Dan Carpenter Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 00b710016d21..b7b55eb82714 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -604,7 +604,7 @@ struct xhci_ring *xhci_stream_id_to_ring( if (!ep->stream_info) return NULL; - if (stream_id > ep->stream_info->num_streams) + if (stream_id >= ep->stream_info->num_streams) return NULL; return ep->stream_info->stream_rings[stream_id]; } -- GitLab From cd360be6488cf3e455ca6be57fff6f6fcc787b15 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 13 Mar 2018 17:55:24 +0100 Subject: [PATCH 0138/1291] devpts: hoist out check for DEVPTS_SUPER_MAGIC commit 7d71109df186d630a41280670c8d71d0cf9b0da9 upstream. Hoist the check whether we have already found a suitable devpts filesystem out of devpts_ptmx_path() in preparation for the devpts bind-mount resolution patch. This is a non-functional change. Signed-off-by: Christian Brauner Reviewed-by: "Eric W. Biederman" Acked-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/devpts/inode.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index e31d6ed3ec32..71b901936113 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -138,10 +138,6 @@ static int devpts_ptmx_path(struct path *path) struct super_block *sb; int err; - /* Has the devpts filesystem already been found? */ - if (path->mnt->mnt_sb->s_magic == DEVPTS_SUPER_MAGIC) - return 0; - /* Is a devpts filesystem at "pts" in the same directory? */ err = path_pts(path); if (err) @@ -159,21 +155,25 @@ static int devpts_ptmx_path(struct path *path) struct vfsmount *devpts_mntget(struct file *filp, struct pts_fs_info *fsi) { struct path path; - int err; + int err = 0; path = filp->f_path; path_get(&path); - err = devpts_ptmx_path(&path); + /* Has the devpts filesystem already been found? */ + if (path.mnt->mnt_sb->s_magic != DEVPTS_SUPER_MAGIC) + err = devpts_ptmx_path(&path); dput(path.dentry); if (err) { mntput(path.mnt); return ERR_PTR(err); } + if (DEVPTS_SB(path.mnt->mnt_sb) != fsi) { mntput(path.mnt); return ERR_PTR(-ENODEV); } + return path.mnt; } @@ -182,15 +182,19 @@ struct pts_fs_info *devpts_acquire(struct file *filp) struct pts_fs_info *result; struct path path; struct super_block *sb; - int err; path = filp->f_path; path_get(&path); - err = devpts_ptmx_path(&path); - if (err) { - result = ERR_PTR(err); - goto out; + /* Has the devpts filesystem already been found? */ + if (path.mnt->mnt_sb->s_magic != DEVPTS_SUPER_MAGIC) { + int err; + + err = devpts_ptmx_path(&path); + if (err) { + result = ERR_PTR(err); + goto out; + } } /* -- GitLab From a6d26649fda0274b54fde2b84e61c09dca73e934 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 13 Mar 2018 17:55:25 +0100 Subject: [PATCH 0139/1291] devpts: resolve devpts bind-mounts commit a319b01d9095da6f6c54bd20c1f1300762506255 upstream. Most libcs will still look at /dev/ptmx when opening the master fd of a pty device. When /dev/ptmx is a bind-mount of /dev/pts/ptmx and the TIOCGPTPEER ioctl() is used to safely retrieve a file descriptor for the slave side of the pty based on the master fd, the /proc/self/fd/{0,1,2} symlinks will point to /. A very simply reproducer for this issue presupposing a libc that uses TIOCGPTPEER in its openpty() implementation is: unshare --mount mount --bind /dev/pts/ptmx /dev/ptmx chmod 666 /dev/ptmx script ls -al /proc/self/fd/0 Having bind-mounts of /dev/pts/ptmx to /dev/ptmx not working correctly is a regression. In addition, it is also a fairly common scenario in containers employing user namespaces. The reason for the current failure is that the kernel tries to verify the useability of the devpts filesystem without resolving the /dev/ptmx bind-mount first. This will lead it to detect that the dentry is escaping its bind-mount. The reason is that while the devpts filesystem mounted at /dev/pts has the devtmpfs mounted at /dev as its parent mount: 21 -- -- / /dev -- 21 -- / /dev/pts devtmpfs and devpts are on different devices -- -- 0:6 / /dev -- -- 0:20 / /dev/pts This has the consequence that the pathname of the parent directory of the devpts filesystem mount at /dev/pts is /. So if /dev/ptmx is a bind-mount of /dev/pts/ptmx then the /dev/ptmx bind-mount and the devpts mount at /dev/pts will end up being located on the same device which is recorded in the superblock of their vfsmount. This means the parent directory of the /dev/ptmx bind-mount will be /ptmx: -- -- ---- /ptmx /dev/ptmx Without the bind-mount resolution patch the kernel will now perform the bind-mount escape check directly on /dev/ptmx. The function responsible for this is devpts_ptmx_path() which calls pts_path() which in turn calls path_parent_directory(). Based on the above explanation, path_parent_directory() will yield / as the parent directory for the /dev/ptmx bind-mount and not the expected /dev. Thus, the kernel detects that /dev/ptmx is escaping its bind-mount and will set /proc//fd/ to /. This patch changes the logic to first resolve any bind-mounts. After the bind-mounts have been resolved (i.e. we have traced it back to the associated devpts mount) devpts_ptmx_path() can be called. In order to guarantee correct path generation for the slave file descriptor the kernel now requires that a pts directory is found in the parent directory of the ptmx bind-mount. This implies that when doing bind-mounts the ptmx bind-mount and the devpts mount should have a common parent directory. A valid example is: mount -t devpts devpts /dev/pts mount --bind /dev/pts/ptmx /dev/ptmx an invalid example is: mount -t devpts devpts /dev/pts mount --bind /dev/pts/ptmx /ptmx This allows us to support: - calling open on ptmx devices located inside non-standard devpts mounts: mount -t devpts devpts /mnt master = open("/mnt/ptmx", ...); slave = ioctl(master, TIOCGPTPEER, ...); - calling open on ptmx devices located outside the devpts mount with a common ancestor directory: mount -t devpts devpts /dev/pts mount --bind /dev/pts/ptmx /dev/ptmx master = open("/dev/ptmx", ...); slave = ioctl(master, TIOCGPTPEER, ...); while failing on ptmx devices located outside the devpts mount without a common ancestor directory: mount -t devpts devpts /dev/pts mount --bind /dev/pts/ptmx /ptmx master = open("/ptmx", ...); slave = ioctl(master, TIOCGPTPEER, ...); in which case save path generation cannot be guaranteed. Signed-off-by: Christian Brauner Suggested-by: Eric Biederman Suggested-by: Linus Torvalds Reviewed-by: "Eric W. Biederman" Acked-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- fs/devpts/inode.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 71b901936113..542364bf923e 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -160,21 +160,27 @@ struct vfsmount *devpts_mntget(struct file *filp, struct pts_fs_info *fsi) path = filp->f_path; path_get(&path); - /* Has the devpts filesystem already been found? */ - if (path.mnt->mnt_sb->s_magic != DEVPTS_SUPER_MAGIC) + /* Walk upward while the start point is a bind mount of + * a single file. + */ + while (path.mnt->mnt_root == path.dentry) + if (follow_up(&path) == 0) + break; + + /* devpts_ptmx_path() finds a devpts fs or returns an error. */ + if ((path.mnt->mnt_sb->s_magic != DEVPTS_SUPER_MAGIC) || + (DEVPTS_SB(path.mnt->mnt_sb) != fsi)) err = devpts_ptmx_path(&path); dput(path.dentry); - if (err) { - mntput(path.mnt); - return ERR_PTR(err); - } + if (!err) { + if (DEVPTS_SB(path.mnt->mnt_sb) == fsi) + return path.mnt; - if (DEVPTS_SB(path.mnt->mnt_sb) != fsi) { - mntput(path.mnt); - return ERR_PTR(-ENODEV); + err = -ENODEV; } - return path.mnt; + mntput(path.mnt); + return ERR_PTR(err); } struct pts_fs_info *devpts_acquire(struct file *filp) -- GitLab From 298243a5fb640f018e2fae22c0c895f1b27f0963 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 3 Jul 2018 17:10:19 -0700 Subject: [PATCH 0140/1291] Fix up non-directory creation in SGID directories commit 0fa3ecd87848c9c93c2c828ef4c3a8ca36ce46c7 upstream. sgid directories have special semantics, making newly created files in the directory belong to the group of the directory, and newly created subdirectories will also become sgid. This is historically used for group-shared directories. But group directories writable by non-group members should not imply that such non-group members can magically join the group, so make sure to clear the sgid bit on non-directories for non-members (but remember that sgid without group execute means "mandatory locking", just to confuse things even more). Reported-by: Jann Horn Cc: Andy Lutomirski Cc: Al Viro Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/inode.c b/fs/inode.c index e07b3e1f5970..cfc36d11bcb3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2006,8 +2006,14 @@ void inode_init_owner(struct inode *inode, const struct inode *dir, inode->i_uid = current_fsuid(); if (dir && dir->i_mode & S_ISGID) { inode->i_gid = dir->i_gid; + + /* Directories are special, and always inherit S_ISGID */ if (S_ISDIR(mode)) mode |= S_ISGID; + else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) && + !in_group_p(inode->i_gid) && + !capable_wrt_inode_uidgid(dir, CAP_FSETID)) + mode &= ~S_ISGID; } else inode->i_gid = current_fsgid(); inode->i_mode = mode; -- GitLab From ef86f3a72adb8a7931f67335560740a7ad696d1d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 Jan 2018 10:53:05 +0800 Subject: [PATCH 0141/1291] genirq/affinity: assign vectors to all possible CPUs commit 84676c1f21e8ff54befe985f4f14dc1edc10046b upstream. Currently we assign managed interrupt vectors to all present CPUs. This works fine for systems were we only online/offline CPUs. But in case of systems that support physical CPU hotplug (or the virtualized version of it) this means the additional CPUs covered for in the ACPI tables or on the command line are not catered for. To fix this we'd either need to introduce new hotplug CPU states just for this case, or we can start assining vectors to possible but not present CPUs. Reported-by: Christian Borntraeger Tested-by: Christian Borntraeger Tested-by: Stefan Haberland Fixes: 4b855ad37194 ("blk-mq: Create hctx for each present CPU") Cc: linux-kernel@vger.kernel.org Cc: Thomas Gleixner Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- kernel/irq/affinity.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index e12d35108225..a37a3b4b6342 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -39,7 +39,7 @@ static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, } } -static cpumask_var_t *alloc_node_to_present_cpumask(void) +static cpumask_var_t *alloc_node_to_possible_cpumask(void) { cpumask_var_t *masks; int node; @@ -62,7 +62,7 @@ static cpumask_var_t *alloc_node_to_present_cpumask(void) return NULL; } -static void free_node_to_present_cpumask(cpumask_var_t *masks) +static void free_node_to_possible_cpumask(cpumask_var_t *masks) { int node; @@ -71,22 +71,22 @@ static void free_node_to_present_cpumask(cpumask_var_t *masks) kfree(masks); } -static void build_node_to_present_cpumask(cpumask_var_t *masks) +static void build_node_to_possible_cpumask(cpumask_var_t *masks) { int cpu; - for_each_present_cpu(cpu) + for_each_possible_cpu(cpu) cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]); } -static int get_nodes_in_cpumask(cpumask_var_t *node_to_present_cpumask, +static int get_nodes_in_cpumask(cpumask_var_t *node_to_possible_cpumask, const struct cpumask *mask, nodemask_t *nodemsk) { int n, nodes = 0; /* Calculate the number of nodes in the supplied affinity mask */ for_each_node(n) { - if (cpumask_intersects(mask, node_to_present_cpumask[n])) { + if (cpumask_intersects(mask, node_to_possible_cpumask[n])) { node_set(n, *nodemsk); nodes++; } @@ -109,7 +109,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) int last_affv = affv + affd->pre_vectors; nodemask_t nodemsk = NODE_MASK_NONE; struct cpumask *masks; - cpumask_var_t nmsk, *node_to_present_cpumask; + cpumask_var_t nmsk, *node_to_possible_cpumask; /* * If there aren't any vectors left after applying the pre/post @@ -125,8 +125,8 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) if (!masks) goto out; - node_to_present_cpumask = alloc_node_to_present_cpumask(); - if (!node_to_present_cpumask) + node_to_possible_cpumask = alloc_node_to_possible_cpumask(); + if (!node_to_possible_cpumask) goto out; /* Fill out vectors at the beginning that don't need affinity */ @@ -135,8 +135,8 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) /* Stabilize the cpumasks */ get_online_cpus(); - build_node_to_present_cpumask(node_to_present_cpumask); - nodes = get_nodes_in_cpumask(node_to_present_cpumask, cpu_present_mask, + build_node_to_possible_cpumask(node_to_possible_cpumask); + nodes = get_nodes_in_cpumask(node_to_possible_cpumask, cpu_possible_mask, &nodemsk); /* @@ -146,7 +146,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) if (affv <= nodes) { for_each_node_mask(n, nodemsk) { cpumask_copy(masks + curvec, - node_to_present_cpumask[n]); + node_to_possible_cpumask[n]); if (++curvec == last_affv) break; } @@ -160,7 +160,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes; /* Get the cpus on this node which are in the mask */ - cpumask_and(nmsk, cpu_present_mask, node_to_present_cpumask[n]); + cpumask_and(nmsk, cpu_possible_mask, node_to_possible_cpumask[n]); /* Calculate the number of cpus per vector */ ncpus = cpumask_weight(nmsk); @@ -192,7 +192,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) /* Fill out vectors at the end that don't need affinity */ for (; curvec < nvecs; curvec++) cpumask_copy(masks + curvec, irq_default_affinity); - free_node_to_present_cpumask(node_to_present_cpumask); + free_node_to_possible_cpumask(node_to_possible_cpumask); out: free_cpumask_var(nmsk); return masks; @@ -214,7 +214,7 @@ int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity return 0; get_online_cpus(); - ret = min_t(int, cpumask_weight(cpu_present_mask), vecs) + resv; + ret = min_t(int, cpumask_weight(cpu_possible_mask), vecs) + resv; put_online_cpus(); return ret; } -- GitLab From d7e6dcdaa3ea5f154d8924af26d12881a6380c71 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Thu, 19 Oct 2017 02:48:48 -0700 Subject: [PATCH 0142/1291] scsi: megaraid_sas: use adapter_type for all gen controllers commit c365178f3147f38d26c15bdf43a363bacb5406ec upstream. No functional change. Refactor adapter_type to set for all generation controllers, not just for fusion controllers. Signed-off-by: Kashyap Desai Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas.h | 8 ++ drivers/scsi/megaraid/megaraid_sas_base.c | 88 +++++++++++++-------- drivers/scsi/megaraid/megaraid_sas_fp.c | 10 +-- drivers/scsi/megaraid/megaraid_sas_fusion.c | 18 ++--- drivers/scsi/megaraid/megaraid_sas_fusion.h | 7 -- 5 files changed, 76 insertions(+), 55 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index a6722c93a295..90b9b5d7f0f8 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -1504,6 +1504,13 @@ enum FW_BOOT_CONTEXT { #define MR_CAN_HANDLE_SYNC_CACHE_OFFSET 0X01000000 +enum MR_ADAPTER_TYPE { + MFI_SERIES = 1, + THUNDERBOLT_SERIES = 2, + INVADER_SERIES = 3, + VENTURA_SERIES = 4, +}; + /* * register set for both 1068 and 1078 controllers * structure extended for 1078 registers @@ -2242,6 +2249,7 @@ struct megasas_instance { /* preffered count to send as LDIO irrspective of FP capable.*/ u8 r1_ldio_hint_default; u32 nvme_page_size; + u8 adapter_type; }; struct MR_LD_VF_MAP { u32 size; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 4beb4dd2bee8..e90a9e846ac2 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5229,7 +5229,8 @@ static int megasas_init_fw(struct megasas_instance *instance) (&instance->reg_set->outbound_scratch_pad_2); /* Check max MSI-X vectors */ if (fusion) { - if (fusion->adapter_type == THUNDERBOLT_SERIES) { /* Thunderbolt Series*/ + if (instance->adapter_type == THUNDERBOLT_SERIES) { + /* Thunderbolt Series*/ instance->msix_vectors = (scratch_pad_2 & MR_MAX_REPLY_QUEUES_OFFSET) + 1; fw_msix_count = instance->msix_vectors; @@ -5965,6 +5966,46 @@ megasas_set_dma_mask(struct pci_dev *pdev) return 1; } +/* + * megasas_set_adapter_type - Set adapter type. + * Supported controllers can be divided in + * 4 categories- enum MR_ADAPTER_TYPE { + * MFI_SERIES = 1, + * THUNDERBOLT_SERIES = 2, + * INVADER_SERIES = 3, + * VENTURA_SERIES = 4, + * }; + * @instance: Adapter soft state + * return: void + */ +static inline void megasas_set_adapter_type(struct megasas_instance *instance) +{ + switch (instance->pdev->device) { + case PCI_DEVICE_ID_LSI_VENTURA: + case PCI_DEVICE_ID_LSI_HARPOON: + case PCI_DEVICE_ID_LSI_TOMCAT: + case PCI_DEVICE_ID_LSI_VENTURA_4PORT: + case PCI_DEVICE_ID_LSI_CRUSADER_4PORT: + instance->adapter_type = VENTURA_SERIES; + break; + case PCI_DEVICE_ID_LSI_FUSION: + case PCI_DEVICE_ID_LSI_PLASMA: + instance->adapter_type = THUNDERBOLT_SERIES; + break; + case PCI_DEVICE_ID_LSI_INVADER: + case PCI_DEVICE_ID_LSI_INTRUDER: + case PCI_DEVICE_ID_LSI_INTRUDER_24: + case PCI_DEVICE_ID_LSI_CUTLASS_52: + case PCI_DEVICE_ID_LSI_CUTLASS_53: + case PCI_DEVICE_ID_LSI_FURY: + instance->adapter_type = INVADER_SERIES; + break; + default: /* For all other supported controllers */ + instance->adapter_type = MFI_SERIES; + break; + } +} + /** * megasas_probe_one - PCI hotplug entry point * @pdev: PCI device structure @@ -5977,7 +6018,6 @@ static int megasas_probe_one(struct pci_dev *pdev, struct Scsi_Host *host; struct megasas_instance *instance; u16 control = 0; - struct fusion_context *fusion = NULL; /* Reset MSI-X in the kdump kernel */ if (reset_devices) { @@ -6022,39 +6062,10 @@ static int megasas_probe_one(struct pci_dev *pdev, atomic_set(&instance->fw_reset_no_pci_access, 0); instance->pdev = pdev; - switch (instance->pdev->device) { - case PCI_DEVICE_ID_LSI_VENTURA: - case PCI_DEVICE_ID_LSI_HARPOON: - case PCI_DEVICE_ID_LSI_TOMCAT: - case PCI_DEVICE_ID_LSI_VENTURA_4PORT: - case PCI_DEVICE_ID_LSI_CRUSADER_4PORT: - instance->is_ventura = true; - case PCI_DEVICE_ID_LSI_FUSION: - case PCI_DEVICE_ID_LSI_PLASMA: - case PCI_DEVICE_ID_LSI_INVADER: - case PCI_DEVICE_ID_LSI_FURY: - case PCI_DEVICE_ID_LSI_INTRUDER: - case PCI_DEVICE_ID_LSI_INTRUDER_24: - case PCI_DEVICE_ID_LSI_CUTLASS_52: - case PCI_DEVICE_ID_LSI_CUTLASS_53: - { - if (megasas_alloc_fusion_context(instance)) { - megasas_free_fusion_context(instance); - goto fail_alloc_dma_buf; - } - fusion = instance->ctrl_context; - - if ((instance->pdev->device == PCI_DEVICE_ID_LSI_FUSION) || - (instance->pdev->device == PCI_DEVICE_ID_LSI_PLASMA)) - fusion->adapter_type = THUNDERBOLT_SERIES; - else if (instance->is_ventura) - fusion->adapter_type = VENTURA_SERIES; - else - fusion->adapter_type = INVADER_SERIES; - } - break; - default: /* For all other supported controllers */ + megasas_set_adapter_type(instance); + switch (instance->adapter_type) { + case MFI_SERIES: instance->producer = pci_alloc_consistent(pdev, sizeof(u32), &instance->producer_h); @@ -6070,7 +6081,16 @@ static int megasas_probe_one(struct pci_dev *pdev, *instance->producer = 0; *instance->consumer = 0; + break; + case VENTURA_SERIES: + instance->is_ventura = 1; + case THUNDERBOLT_SERIES: + case INVADER_SERIES: + if (megasas_alloc_fusion_context(instance)) { + megasas_free_fusion_context(instance); + goto fail_alloc_dma_buf; + } } /* Crash dump feature related initialisation*/ diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c index 08945142b9f8..5e18f25bd53e 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fp.c +++ b/drivers/scsi/megaraid/megaraid_sas_fp.c @@ -755,8 +755,8 @@ static u8 mr_spanset_get_phy_params(struct megasas_instance *instance, u32 ld, } } else { if ((raid->level >= 5) && - ((fusion->adapter_type == THUNDERBOLT_SERIES) || - ((fusion->adapter_type == INVADER_SERIES) && + ((instance->adapter_type == THUNDERBOLT_SERIES) || + ((instance->adapter_type == INVADER_SERIES) && (raid->regTypeReqOnRead != REGION_TYPE_UNUSED)))) pRAID_Context->reg_lock_flags = REGION_TYPE_EXCLUSIVE; else if (raid->level == 1) { @@ -871,8 +871,8 @@ u8 MR_GetPhyParams(struct megasas_instance *instance, u32 ld, u64 stripRow, } } else { if ((raid->level >= 5) && - ((fusion->adapter_type == THUNDERBOLT_SERIES) || - ((fusion->adapter_type == INVADER_SERIES) && + ((instance->adapter_type == THUNDERBOLT_SERIES) || + ((instance->adapter_type == INVADER_SERIES) && (raid->regTypeReqOnRead != REGION_TYPE_UNUSED)))) pRAID_Context->reg_lock_flags = REGION_TYPE_EXCLUSIVE; else if (raid->level == 1) { @@ -1096,7 +1096,7 @@ MR_BuildRaidContext(struct megasas_instance *instance, cpu_to_le16(raid->fpIoTimeoutForLd ? raid->fpIoTimeoutForLd : map->raidMap.fpPdIoTimeoutSec); - if (fusion->adapter_type == INVADER_SERIES) + if (instance->adapter_type == INVADER_SERIES) pRAID_Context->reg_lock_flags = (isRead) ? raid->regTypeReqOnRead : raid->regTypeReqOnWrite; else if (!instance->is_ventura) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 72a919179d06..ca7c235c506a 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -838,7 +838,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) drv_ops = (MFI_CAPABILITIES *) &(init_frame->driver_operations); /* driver support Extended MSIX */ - if (fusion->adapter_type >= INVADER_SERIES) + if (instance->adapter_type >= INVADER_SERIES) drv_ops->mfi_capabilities.support_additional_msix = 1; /* driver supports HA / Remote LUN over Fast Path interface */ drv_ops->mfi_capabilities.support_fp_remote_lun = 1; @@ -1789,7 +1789,7 @@ megasas_make_sgl_fusion(struct megasas_instance *instance, fusion = instance->ctrl_context; - if (fusion->adapter_type >= INVADER_SERIES) { + if (instance->adapter_type >= INVADER_SERIES) { struct MPI25_IEEE_SGE_CHAIN64 *sgl_ptr_end = sgl_ptr; sgl_ptr_end += fusion->max_sge_in_main_msg - 1; sgl_ptr_end->Flags = 0; @@ -1799,7 +1799,7 @@ megasas_make_sgl_fusion(struct megasas_instance *instance, sgl_ptr->Length = cpu_to_le32(sg_dma_len(os_sgl)); sgl_ptr->Address = cpu_to_le64(sg_dma_address(os_sgl)); sgl_ptr->Flags = 0; - if (fusion->adapter_type >= INVADER_SERIES) + if (instance->adapter_type >= INVADER_SERIES) if (i == sge_count - 1) sgl_ptr->Flags = IEEE_SGE_FLAGS_END_OF_LIST; sgl_ptr++; @@ -1809,7 +1809,7 @@ megasas_make_sgl_fusion(struct megasas_instance *instance, (sge_count > fusion->max_sge_in_main_msg)) { struct MPI25_IEEE_SGE_CHAIN64 *sg_chain; - if (fusion->adapter_type >= INVADER_SERIES) { + if (instance->adapter_type >= INVADER_SERIES) { if ((le16_to_cpu(cmd->io_request->IoFlags) & MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) != MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) @@ -1825,7 +1825,7 @@ megasas_make_sgl_fusion(struct megasas_instance *instance, sg_chain = sgl_ptr; /* Prepare chain element */ sg_chain->NextChainOffset = 0; - if (fusion->adapter_type >= INVADER_SERIES) + if (instance->adapter_type >= INVADER_SERIES) sg_chain->Flags = IEEE_SGE_FLAGS_CHAIN_ELEMENT; else sg_chain->Flags = @@ -2402,7 +2402,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, cmd->request_desc->SCSIIO.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_FP_IO << MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); - if (fusion->adapter_type == INVADER_SERIES) { + if (instance->adapter_type == INVADER_SERIES) { if (io_request->RaidContext.raid_context.reg_lock_flags == REGION_TYPE_UNUSED) cmd->request_desc->SCSIIO.RequestFlags = @@ -2467,7 +2467,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, cmd->request_desc->SCSIIO.RequestFlags = (MEGASAS_REQ_DESCRIPT_FLAGS_LD_IO << MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); - if (fusion->adapter_type == INVADER_SERIES) { + if (instance->adapter_type == INVADER_SERIES) { if (io_info.do_fp_rlbypass || (io_request->RaidContext.raid_context.reg_lock_flags == REGION_TYPE_UNUSED)) @@ -2688,7 +2688,7 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, pRAID_Context->timeout_value = cpu_to_le16((os_timeout_value > timeout_limit) ? timeout_limit : os_timeout_value); - if (fusion->adapter_type >= INVADER_SERIES) + if (instance->adapter_type >= INVADER_SERIES) io_request->IoFlags |= cpu_to_le16(MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH); @@ -3301,7 +3301,7 @@ build_mpt_mfi_pass_thru(struct megasas_instance *instance, io_req = cmd->io_request; - if (fusion->adapter_type >= INVADER_SERIES) { + if (instance->adapter_type >= INVADER_SERIES) { struct MPI25_IEEE_SGE_CHAIN64 *sgl_ptr_end = (struct MPI25_IEEE_SGE_CHAIN64 *)&io_req->SGL; sgl_ptr_end += fusion->max_sge_in_main_msg - 1; diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.h b/drivers/scsi/megaraid/megaraid_sas_fusion.h index d78d76112501..7c1f7ccf031d 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.h +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h @@ -104,12 +104,6 @@ enum MR_RAID_FLAGS_IO_SUB_TYPE { #define RAID_1_PEER_CMDS 2 #define JBOD_MAPS_COUNT 2 -enum MR_FUSION_ADAPTER_TYPE { - THUNDERBOLT_SERIES = 0, - INVADER_SERIES = 1, - VENTURA_SERIES = 2, -}; - /* * Raid Context structure which describes MegaRAID specific IO Parameters * This resides at offset 0x60 where the SGL normally starts in MPT IO Frames @@ -1319,7 +1313,6 @@ struct fusion_context { struct LD_LOAD_BALANCE_INFO *load_balance_info; u32 load_balance_info_pages; LD_SPAN_INFO log_to_span[MAX_LOGICAL_DRIVES_EXT]; - u8 adapter_type; struct LD_STREAM_DETECT **stream_detect_by_ld; }; -- GitLab From 90229163fa6cff513387e5e2a422a6193391a3d6 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Thu, 19 Oct 2017 02:48:50 -0700 Subject: [PATCH 0143/1291] scsi: megaraid_sas: replace instance->ctrl_context checks with instance->adapter_type commit e7d36b88435077847e1ea992919c600f3fa9321c upstream. Increase code readability. No functional change. Signed-off-by: Kashyap Desai Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 64 +++++++++++++---------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index e90a9e846ac2..c2530f778c38 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2023,7 +2023,7 @@ void megaraid_sas_kill_hba(struct megasas_instance *instance) msleep(1000); if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0073SKINNY) || (instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0071SKINNY) || - (instance->ctrl_context)) { + (instance->adapter_type != MFI_SERIES)) { writel(MFI_STOP_ADP, &instance->reg_set->doorbell); /* Flush */ readl(&instance->reg_set->doorbell); @@ -2494,7 +2494,8 @@ int megasas_sriov_start_heartbeat(struct megasas_instance *instance, dev_warn(&instance->pdev->dev, "SR-IOV: Starting heartbeat for scsi%d\n", instance->host->host_no); - if (instance->ctrl_context && !instance->mask_interrupts) + if ((instance->adapter_type != MFI_SERIES) && + !instance->mask_interrupts) retval = megasas_issue_blocked_cmd(instance, cmd, MEGASAS_ROUTINE_WAIT_TIME_VF); else @@ -2790,7 +2791,9 @@ static int megasas_reset_bus_host(struct scsi_cmnd *scmd) /* * First wait for all commands to complete */ - if (instance->ctrl_context) { + if (instance->adapter_type == MFI_SERIES) { + ret = megasas_generic_reset(scmd); + } else { struct megasas_cmd_fusion *cmd; cmd = (struct megasas_cmd_fusion *)scmd->SCp.ptr; if (cmd) @@ -2798,8 +2801,7 @@ static int megasas_reset_bus_host(struct scsi_cmnd *scmd) MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE); ret = megasas_reset_fusion(scmd->device->host, SCSIIO_TIMEOUT_OCR); - } else - ret = megasas_generic_reset(scmd); + } return ret; } @@ -2816,7 +2818,7 @@ static int megasas_task_abort(struct scsi_cmnd *scmd) instance = (struct megasas_instance *)scmd->device->host->hostdata; - if (instance->ctrl_context) + if (instance->adapter_type != MFI_SERIES) ret = megasas_task_abort_fusion(scmd); else { sdev_printk(KERN_NOTICE, scmd->device, "TASK ABORT not supported\n"); @@ -2838,7 +2840,7 @@ static int megasas_reset_target(struct scsi_cmnd *scmd) instance = (struct megasas_instance *)scmd->device->host->hostdata; - if (instance->ctrl_context) + if (instance->adapter_type != MFI_SERIES) ret = megasas_reset_target_fusion(scmd); else { sdev_printk(KERN_NOTICE, scmd->device, "TARGET RESET not supported\n"); @@ -3715,7 +3717,7 @@ megasas_transition_to_ready(struct megasas_instance *instance, int ocr) PCI_DEVICE_ID_LSI_SAS0073SKINNY) || (instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0071SKINNY) || - (instance->ctrl_context)) + (instance->adapter_type != MFI_SERIES)) writel( MFI_INIT_CLEAR_HANDSHAKE|MFI_INIT_HOTPLUG, &instance->reg_set->doorbell); @@ -3733,7 +3735,7 @@ megasas_transition_to_ready(struct megasas_instance *instance, int ocr) PCI_DEVICE_ID_LSI_SAS0073SKINNY) || (instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0071SKINNY) || - (instance->ctrl_context)) + (instance->adapter_type != MFI_SERIES)) writel(MFI_INIT_HOTPLUG, &instance->reg_set->doorbell); else @@ -3753,11 +3755,11 @@ megasas_transition_to_ready(struct megasas_instance *instance, int ocr) PCI_DEVICE_ID_LSI_SAS0073SKINNY) || (instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0071SKINNY) || - (instance->ctrl_context)) { + (instance->adapter_type != MFI_SERIES)) { writel(MFI_RESET_FLAGS, &instance->reg_set->doorbell); - if (instance->ctrl_context) { + if (instance->adapter_type != MFI_SERIES) { for (i = 0; i < (10 * 1000); i += 20) { if (readl( &instance-> @@ -3924,7 +3926,8 @@ static int megasas_create_frame_pool(struct megasas_instance *instance) * max_sge_sz = 12 byte (sizeof megasas_sge64) * Total 192 byte (3 MFI frame of 64 byte) */ - frame_count = instance->ctrl_context ? (3 + 1) : (15 + 1); + frame_count = (instance->adapter_type == MFI_SERIES) ? + (15 + 1) : (3 + 1); instance->mfi_frame_size = MEGAMFI_FRAME_SIZE * frame_count; /* * Use DMA pool facility provided by PCI layer @@ -3979,7 +3982,7 @@ static int megasas_create_frame_pool(struct megasas_instance *instance) memset(cmd->frame, 0, instance->mfi_frame_size); cmd->frame->io.context = cpu_to_le32(cmd->index); cmd->frame->io.pad_0 = 0; - if (!instance->ctrl_context && reset_devices) + if ((instance->adapter_type == MFI_SERIES) && reset_devices) cmd->frame->hdr.cmd = MFI_CMD_INVALID; } @@ -4099,7 +4102,7 @@ int megasas_alloc_cmds(struct megasas_instance *instance) inline int dcmd_timeout_ocr_possible(struct megasas_instance *instance) { - if (!instance->ctrl_context) + if (instance->adapter_type == MFI_SERIES) return KILL_ADAPTER; else if (instance->unload || test_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags)) @@ -4143,7 +4146,8 @@ megasas_get_pd_info(struct megasas_instance *instance, struct scsi_device *sdev) dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(instance->pd_info_h); dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct MR_PD_INFO)); - if (instance->ctrl_context && !instance->mask_interrupts) + if ((instance->adapter_type != MFI_SERIES) && + !instance->mask_interrupts) ret = megasas_issue_blocked_cmd(instance, cmd, MFI_IO_TIMEOUT_SECS); else ret = megasas_issue_polled(instance, cmd); @@ -4240,7 +4244,8 @@ megasas_get_pd_list(struct megasas_instance *instance) dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h); dcmd->sgl.sge32[0].length = cpu_to_le32(MEGASAS_MAX_PD * sizeof(struct MR_PD_LIST)); - if (instance->ctrl_context && !instance->mask_interrupts) + if ((instance->adapter_type != MFI_SERIES) && + !instance->mask_interrupts) ret = megasas_issue_blocked_cmd(instance, cmd, MFI_IO_TIMEOUT_SECS); else @@ -4251,7 +4256,7 @@ megasas_get_pd_list(struct megasas_instance *instance) dev_info(&instance->pdev->dev, "MR_DCMD_PD_LIST_QUERY " "failed/not supported by firmware\n"); - if (instance->ctrl_context) + if (instance->adapter_type != MFI_SERIES) megaraid_sas_kill_hba(instance); else instance->pd_list_not_supported = 1; @@ -4372,7 +4377,8 @@ megasas_get_ld_list(struct megasas_instance *instance) dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct MR_LD_LIST)); dcmd->pad_0 = 0; - if (instance->ctrl_context && !instance->mask_interrupts) + if ((instance->adapter_type != MFI_SERIES) && + !instance->mask_interrupts) ret = megasas_issue_blocked_cmd(instance, cmd, MFI_IO_TIMEOUT_SECS); else @@ -4491,7 +4497,8 @@ megasas_ld_list_query(struct megasas_instance *instance, u8 query_type) dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct MR_LD_TARGETID_LIST)); dcmd->pad_0 = 0; - if (instance->ctrl_context && !instance->mask_interrupts) + if ((instance->adapter_type != MFI_SERIES) && + !instance->mask_interrupts) ret = megasas_issue_blocked_cmd(instance, cmd, MFI_IO_TIMEOUT_SECS); else ret = megasas_issue_polled(instance, cmd); @@ -4664,7 +4671,8 @@ megasas_get_ctrl_info(struct megasas_instance *instance) dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct megasas_ctrl_info)); dcmd->mbox.b[0] = 1; - if (instance->ctrl_context && !instance->mask_interrupts) + if ((instance->adapter_type != MFI_SERIES) && + !instance->mask_interrupts) ret = megasas_issue_blocked_cmd(instance, cmd, MFI_IO_TIMEOUT_SECS); else ret = megasas_issue_polled(instance, cmd); @@ -4783,7 +4791,8 @@ int megasas_set_crash_dump_params(struct megasas_instance *instance, dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(instance->crash_dump_h); dcmd->sgl.sge32[0].length = cpu_to_le32(CRASH_DMA_BUF_SIZE); - if (instance->ctrl_context && !instance->mask_interrupts) + if ((instance->adapter_type != MFI_SERIES) && + !instance->mask_interrupts) ret = megasas_issue_blocked_cmd(instance, cmd, MFI_IO_TIMEOUT_SECS); else ret = megasas_issue_polled(instance, cmd); @@ -5170,7 +5179,7 @@ static int megasas_init_fw(struct megasas_instance *instance) reg_set = instance->reg_set; - if (fusion) + if (instance->adapter_type != MFI_SERIES) instance->instancet = &megasas_instance_template_fusion; else { switch (instance->pdev->device) { @@ -5805,7 +5814,8 @@ megasas_get_target_prop(struct megasas_instance *instance, dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct MR_TARGET_PROPERTIES)); - if (instance->ctrl_context && !instance->mask_interrupts) + if ((instance->adapter_type != MFI_SERIES) && + !instance->mask_interrupts) ret = megasas_issue_blocked_cmd(instance, cmd, MFI_IO_TIMEOUT_SECS); else @@ -6186,7 +6196,7 @@ static int megasas_probe_one(struct pci_dev *pdev, instance->disableOnlineCtrlReset = 1; instance->UnevenSpanSupport = 0; - if (instance->ctrl_context) { + if (instance->adapter_type != MFI_SERIES) { INIT_WORK(&instance->work_init, megasas_fusion_ocr_wq); INIT_WORK(&instance->crash_init, megasas_fusion_crash_dump_wq); } else @@ -6266,7 +6276,7 @@ static int megasas_probe_one(struct pci_dev *pdev, instance->instancet->disable_intr(instance); megasas_destroy_irqs(instance); - if (instance->ctrl_context) + if (instance->adapter_type != MFI_SERIES) megasas_release_fusion(instance); else megasas_release_mfi(instance); @@ -6500,7 +6510,7 @@ megasas_resume(struct pci_dev *pdev) if (rval < 0) goto fail_reenable_msix; - if (instance->ctrl_context) { + if (instance->adapter_type != MFI_SERIES) { megasas_reset_reply_desc(instance); if (megasas_ioc_init_fusion(instance)) { megasas_free_cmds(instance); @@ -6684,7 +6694,7 @@ static void megasas_detach_one(struct pci_dev *pdev) } - if (instance->ctrl_context) { + if (instance->adapter_type != MFI_SERIES) { megasas_release_fusion(instance); pd_seq_map_sz = sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) + (sizeof(struct MR_PD_CFG_SEQ) * -- GitLab From fda0eab89ccf9d7a6f92d818847090ac4d268cda Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Thu, 19 Oct 2017 02:48:52 -0700 Subject: [PATCH 0144/1291] scsi: megaraid_sas: replace is_ventura with adapter_type checks commit f369a31578c461a360f58c7695e5aef931bada13 upstream. No functional change. Signed-off-by: Kashyap Desai Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas.h | 1 - drivers/scsi/megaraid/megaraid_sas_base.c | 9 ++++---- drivers/scsi/megaraid/megaraid_sas_fp.c | 10 ++++----- drivers/scsi/megaraid/megaraid_sas_fusion.c | 24 ++++++++++----------- 4 files changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 90b9b5d7f0f8..f3eddf93cf91 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2243,7 +2243,6 @@ struct megasas_instance { bool dev_handle; bool fw_sync_cache_support; u32 mfi_frame_size; - bool is_ventura; bool msix_combined; u16 max_raid_mapsize; /* preffered count to send as LDIO irrspective of FP capable.*/ diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index c2530f778c38..551a5b0375dd 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5220,7 +5220,7 @@ static int megasas_init_fw(struct megasas_instance *instance) goto fail_ready_state; } - if (instance->is_ventura) { + if (instance->adapter_type == VENTURA_SERIES) { scratch_pad_3 = readl(&instance->reg_set->outbound_scratch_pad_3); instance->max_raid_mapsize = ((scratch_pad_3 >> @@ -5329,7 +5329,7 @@ static int megasas_init_fw(struct megasas_instance *instance) if (instance->instancet->init_adapter(instance)) goto fail_init_adapter; - if (instance->is_ventura) { + if (instance->adapter_type == VENTURA_SERIES) { scratch_pad_4 = readl(&instance->reg_set->outbound_scratch_pad_4); if ((scratch_pad_4 & MR_NVME_PAGE_SIZE_MASK) >= @@ -5365,7 +5365,7 @@ static int megasas_init_fw(struct megasas_instance *instance) memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS); /* stream detection initialization */ - if (instance->is_ventura && fusion) { + if (instance->adapter_type == VENTURA_SERIES) { fusion->stream_detect_by_ld = kzalloc(sizeof(struct LD_STREAM_DETECT *) * MAX_LOGICAL_DRIVES_EXT, @@ -6094,7 +6094,6 @@ static int megasas_probe_one(struct pci_dev *pdev, break; case VENTURA_SERIES: - instance->is_ventura = 1; case THUNDERBOLT_SERIES: case INVADER_SERIES: if (megasas_alloc_fusion_context(instance)) { @@ -6686,7 +6685,7 @@ static void megasas_detach_one(struct pci_dev *pdev) if (instance->msix_vectors) pci_free_irq_vectors(instance->pdev); - if (instance->is_ventura) { + if (instance->adapter_type == VENTURA_SERIES) { for (i = 0; i < MAX_LOGICAL_DRIVES_EXT; ++i) kfree(fusion->stream_detect_by_ld[i]); kfree(fusion->stream_detect_by_ld); diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c index 5e18f25bd53e..f2ffde430ec1 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fp.c +++ b/drivers/scsi/megaraid/megaraid_sas_fp.c @@ -745,7 +745,7 @@ static u8 mr_spanset_get_phy_params(struct megasas_instance *instance, u32 ld, *pDevHandle = MR_PdDevHandleGet(pd, map); *pPdInterface = MR_PdInterfaceTypeGet(pd, map); /* get second pd also for raid 1/10 fast path writes*/ - if (instance->is_ventura && + if ((instance->adapter_type == VENTURA_SERIES) && (raid->level == 1) && !io_info->isRead) { r1_alt_pd = MR_ArPdGet(arRef, physArm + 1, map); @@ -770,7 +770,7 @@ static u8 mr_spanset_get_phy_params(struct megasas_instance *instance, u32 ld, } *pdBlock += stripRef + le64_to_cpu(MR_LdSpanPtrGet(ld, span, map)->startBlk); - if (instance->is_ventura) { + if (instance->adapter_type == VENTURA_SERIES) { ((struct RAID_CONTEXT_G35 *)pRAID_Context)->span_arm = (span << RAID_CTX_SPANARM_SPAN_SHIFT) | physArm; io_info->span_arm = @@ -861,7 +861,7 @@ u8 MR_GetPhyParams(struct megasas_instance *instance, u32 ld, u64 stripRow, *pDevHandle = MR_PdDevHandleGet(pd, map); *pPdInterface = MR_PdInterfaceTypeGet(pd, map); /* get second pd also for raid 1/10 fast path writes*/ - if (instance->is_ventura && + if ((instance->adapter_type == VENTURA_SERIES) && (raid->level == 1) && !io_info->isRead) { r1_alt_pd = MR_ArPdGet(arRef, physArm + 1, map); @@ -888,7 +888,7 @@ u8 MR_GetPhyParams(struct megasas_instance *instance, u32 ld, u64 stripRow, } *pdBlock += stripRef + le64_to_cpu(MR_LdSpanPtrGet(ld, span, map)->startBlk); - if (instance->is_ventura) { + if (instance->adapter_type == VENTURA_SERIES) { ((struct RAID_CONTEXT_G35 *)pRAID_Context)->span_arm = (span << RAID_CTX_SPANARM_SPAN_SHIFT) | physArm; io_info->span_arm = @@ -1099,7 +1099,7 @@ MR_BuildRaidContext(struct megasas_instance *instance, if (instance->adapter_type == INVADER_SERIES) pRAID_Context->reg_lock_flags = (isRead) ? raid->regTypeReqOnRead : raid->regTypeReqOnWrite; - else if (!instance->is_ventura) + else if (instance->adapter_type == THUNDERBOLT_SERIES) pRAID_Context->reg_lock_flags = (isRead) ? REGION_TYPE_SHARED_READ : raid->regTypeReqOnWrite; pRAID_Context->virtual_disk_tgt_id = raid->targetId; diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index ca7c235c506a..27f39c784c63 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -237,7 +237,7 @@ megasas_fusion_update_can_queue(struct megasas_instance *instance, int fw_boot_c reg_set = instance->reg_set; /* ventura FW does not fill outbound_scratch_pad_3 with queue depth */ - if (!instance->is_ventura) + if (instance->adapter_type < VENTURA_SERIES) cur_max_fw_cmds = readl(&instance->reg_set->outbound_scratch_pad_3) & 0x00FFFF; @@ -285,7 +285,7 @@ megasas_fusion_update_can_queue(struct megasas_instance *instance, int fw_boot_c instance->host->can_queue = instance->cur_can_queue; } - if (instance->is_ventura) + if (instance->adapter_type == VENTURA_SERIES) instance->max_mpt_cmds = instance->max_fw_cmds * RAID_1_PEER_CMDS; else @@ -2349,7 +2349,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, praid_context = &io_request->RaidContext; - if (instance->is_ventura) { + if (instance->adapter_type == VENTURA_SERIES) { spin_lock_irqsave(&instance->stream_lock, spinlock_flags); megasas_stream_detect(instance, cmd, &io_info); spin_unlock_irqrestore(&instance->stream_lock, spinlock_flags); @@ -2415,7 +2415,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, io_request->RaidContext.raid_context.reg_lock_flags |= (MR_RL_FLAGS_GRANT_DESTINATION_CUDA | MR_RL_FLAGS_SEQ_NUM_ENABLE); - } else if (instance->is_ventura) { + } else if (instance->adapter_type == VENTURA_SERIES) { io_request->RaidContext.raid_context_g35.nseg_type |= (1 << RAID_CONTEXT_NSEG_SHIFT); io_request->RaidContext.raid_context_g35.nseg_type |= @@ -2434,7 +2434,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, &io_info, local_map_ptr); scp->SCp.Status |= MEGASAS_LOAD_BALANCE_FLAG; cmd->pd_r1_lb = io_info.pd_after_lb; - if (instance->is_ventura) + if (instance->adapter_type == VENTURA_SERIES) io_request->RaidContext.raid_context_g35.span_arm = io_info.span_arm; else @@ -2444,7 +2444,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, } else scp->SCp.Status &= ~MEGASAS_LOAD_BALANCE_FLAG; - if (instance->is_ventura) + if (instance->adapter_type == VENTURA_SERIES) cmd->r1_alt_dev_handle = io_info.r1_alt_dev_handle; else cmd->r1_alt_dev_handle = MR_DEVHANDLE_INVALID; @@ -2480,7 +2480,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, (MR_RL_FLAGS_GRANT_DESTINATION_CPU0 | MR_RL_FLAGS_SEQ_NUM_ENABLE); io_request->RaidContext.raid_context.nseg = 0x1; - } else if (instance->is_ventura) { + } else if (instance->adapter_type == VENTURA_SERIES) { io_request->RaidContext.raid_context_g35.routing_flags |= (1 << MR_RAID_CTX_ROUTINGFLAGS_SQN_SHIFT); io_request->RaidContext.raid_context_g35.nseg_type |= @@ -2555,7 +2555,7 @@ static void megasas_build_ld_nonrw_fusion(struct megasas_instance *instance, /* set RAID context values */ pRAID_Context->config_seq_num = raid->seqNum; - if (!instance->is_ventura) + if (instance->adapter_type != VENTURA_SERIES) pRAID_Context->reg_lock_flags = REGION_TYPE_SHARED_READ; pRAID_Context->timeout_value = cpu_to_le16(raid->fpIoTimeoutForLd); @@ -2640,7 +2640,7 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, cpu_to_le16(device_id + (MAX_PHYSICAL_DEVICES - 1)); pRAID_Context->config_seq_num = pd_sync->seq[pd_index].seqNum; io_request->DevHandle = pd_sync->seq[pd_index].devHandle; - if (instance->is_ventura) { + if (instance->adapter_type == VENTURA_SERIES) { io_request->RaidContext.raid_context_g35.routing_flags |= (1 << MR_RAID_CTX_ROUTINGFLAGS_SQN_SHIFT); io_request->RaidContext.raid_context_g35.nseg_type |= @@ -2771,7 +2771,7 @@ megasas_build_io_fusion(struct megasas_instance *instance, return 1; } - if (instance->is_ventura) { + if (instance->adapter_type == VENTURA_SERIES) { set_num_sge(&io_request->RaidContext.raid_context_g35, sge_count); cpu_to_le16s(&io_request->RaidContext.raid_context_g35.routing_flags); cpu_to_le16s(&io_request->RaidContext.raid_context_g35.nseg_type); @@ -4233,7 +4233,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason) for (i = 0 ; i < instance->max_scsi_cmds; i++) { cmd_fusion = fusion->cmd_list[i]; /*check for extra commands issued by driver*/ - if (instance->is_ventura) { + if (instance->adapter_type == VENTURA_SERIES) { r1_cmd = fusion->cmd_list[i + instance->max_fw_cmds]; megasas_return_cmd_fusion(instance, r1_cmd); } @@ -4334,7 +4334,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason) megasas_set_dynamic_target_properties(sdev); /* reset stream detection array */ - if (instance->is_ventura) { + if (instance->adapter_type == VENTURA_SERIES) { for (j = 0; j < MAX_LOGICAL_DRIVES_EXT; ++j) { memset(fusion->stream_detect_by_ld[j], 0, sizeof(struct LD_STREAM_DETECT)); -- GitLab From c3aa570dd539e4e762cd59c0a6846a5aa946cfba Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Thu, 19 Oct 2017 02:48:54 -0700 Subject: [PATCH 0145/1291] scsi: megaraid_sas: Create separate functions to allocate ctrl memory commit 49a7a4adb0167b656b8dfb6ccb83220d553a1860 upstream. No functional change. Code refactoring to improve readability. Move the code to allocate and free controller memory into separate functions. Signed-off-by: Kashyap Desai Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 122 ++++++++++++++-------- 1 file changed, 76 insertions(+), 46 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 551a5b0375dd..14d5bd5fab15 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -6016,6 +6016,75 @@ static inline void megasas_set_adapter_type(struct megasas_instance *instance) } } +static inline int megasas_alloc_mfi_ctrl_mem(struct megasas_instance *instance) +{ + instance->producer = pci_alloc_consistent(instance->pdev, sizeof(u32), + &instance->producer_h); + instance->consumer = pci_alloc_consistent(instance->pdev, sizeof(u32), + &instance->consumer_h); + + if (!instance->producer || !instance->consumer) { + dev_err(&instance->pdev->dev, + "Failed to allocate memory for producer, consumer\n"); + return -1; + } + + *instance->producer = 0; + *instance->consumer = 0; + return 0; +} + +/** + * megasas_alloc_ctrl_mem - Allocate per controller memory for core data + * structures which are not common across MFI + * adapters and fusion adapters. + * For MFI based adapters, allocate producer and + * consumer buffers. For fusion adapters, allocate + * memory for fusion context. + * @instance: Adapter soft state + * return: 0 for SUCCESS + */ +static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) +{ + switch (instance->adapter_type) { + case MFI_SERIES: + if (megasas_alloc_mfi_ctrl_mem(instance)) + return -ENOMEM; + break; + case VENTURA_SERIES: + case THUNDERBOLT_SERIES: + case INVADER_SERIES: + if (megasas_alloc_fusion_context(instance)) + return -ENOMEM; + break; + } + + return 0; +} + +/* + * megasas_free_ctrl_mem - Free fusion context for fusion adapters and + * producer, consumer buffers for MFI adapters + * + * @instance - Adapter soft instance + * + */ +static inline void megasas_free_ctrl_mem(struct megasas_instance *instance) +{ + if (instance->adapter_type == MFI_SERIES) { + if (instance->producer) + pci_free_consistent(instance->pdev, sizeof(u32), + instance->producer, + instance->producer_h); + if (instance->consumer) + pci_free_consistent(instance->pdev, sizeof(u32), + instance->consumer, + instance->consumer_h); + } else { + megasas_free_fusion_context(instance); + } +} + /** * megasas_probe_one - PCI hotplug entry point * @pdev: PCI device structure @@ -6074,33 +6143,8 @@ static int megasas_probe_one(struct pci_dev *pdev, megasas_set_adapter_type(instance); - switch (instance->adapter_type) { - case MFI_SERIES: - instance->producer = - pci_alloc_consistent(pdev, sizeof(u32), - &instance->producer_h); - instance->consumer = - pci_alloc_consistent(pdev, sizeof(u32), - &instance->consumer_h); - - if (!instance->producer || !instance->consumer) { - dev_printk(KERN_DEBUG, &pdev->dev, "Failed to allocate " - "memory for producer, consumer\n"); - goto fail_alloc_dma_buf; - } - - *instance->producer = 0; - *instance->consumer = 0; - - break; - case VENTURA_SERIES: - case THUNDERBOLT_SERIES: - case INVADER_SERIES: - if (megasas_alloc_fusion_context(instance)) { - megasas_free_fusion_context(instance); - goto fail_alloc_dma_buf; - } - } + if (megasas_alloc_ctrl_mem(instance)) + goto fail_alloc_dma_buf; /* Crash dump feature related initialisation*/ instance->drv_buf_index = 0; @@ -6296,12 +6340,7 @@ static int megasas_probe_one(struct pci_dev *pdev, pci_free_consistent(pdev, sizeof(struct MR_TARGET_PROPERTIES), instance->tgt_prop, instance->tgt_prop_h); - if (instance->producer) - pci_free_consistent(pdev, sizeof(u32), instance->producer, - instance->producer_h); - if (instance->consumer) - pci_free_consistent(pdev, sizeof(u32), instance->consumer, - instance->consumer_h); + megasas_free_ctrl_mem(instance); scsi_host_put(host); fail_alloc_instance: @@ -6572,12 +6611,8 @@ megasas_resume(struct pci_dev *pdev) pci_free_consistent(pdev, sizeof(struct MR_TARGET_PROPERTIES), instance->tgt_prop, instance->tgt_prop_h); - if (instance->producer) - pci_free_consistent(pdev, sizeof(u32), instance->producer, - instance->producer_h); - if (instance->consumer) - pci_free_consistent(pdev, sizeof(u32), instance->consumer, - instance->consumer_h); + + megasas_free_ctrl_mem(instance); scsi_host_put(host); fail_set_dma_mask: @@ -6718,15 +6753,8 @@ static void megasas_detach_one(struct pci_dev *pdev) fusion->pd_seq_sync[i], fusion->pd_seq_phys[i]); } - megasas_free_fusion_context(instance); } else { megasas_release_mfi(instance); - pci_free_consistent(pdev, sizeof(u32), - instance->producer, - instance->producer_h); - pci_free_consistent(pdev, sizeof(u32), - instance->consumer, - instance->consumer_h); } kfree(instance->ctrl_info); @@ -6767,6 +6795,8 @@ static void megasas_detach_one(struct pci_dev *pdev) pci_free_consistent(pdev, sizeof(struct MR_DRV_SYSTEM_INFO), instance->system_info_buf, instance->system_info_h); + megasas_free_ctrl_mem(instance); + scsi_host_put(host); pci_disable_device(pdev); -- GitLab From e58114824fa60ea0ac841a799eaa761fe8c6bd9a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 13 Mar 2018 17:42:40 +0800 Subject: [PATCH 0146/1291] scsi: megaraid_sas: fix selection of reply queue commit adbe552349f2d1e48357a00e564d26135e586634 upstream. Since commit 84676c1f21e8 ("genirq/affinity: assign vectors to all possible CPUs") we could end up with an MSI-X vector that did not have any online CPUs mapped. This would lead to I/O hangs since there was no CPU to receive the completion. Retrieve IRQ affinity information using pci_irq_get_affinity() and use this mapping to choose a reply queue. [mkp: tweaked commit desc] Cc: Hannes Reinecke Cc: "Martin K. Petersen" , Cc: James Bottomley , Cc: Christoph Hellwig , Cc: Don Brace Cc: Kashyap Desai Cc: Laurence Oberman Cc: Mike Snitzer Cc: Meelis Roos Cc: Artem Bityutskiy Fixes: 84676c1f21e8 ("genirq/affinity: assign vectors to all possible CPUs") Signed-off-by: Ming Lei Acked-by: Kashyap Desai Tested-by: Kashyap Desai Reviewed-by: Christoph Hellwig Tested-by: Artem Bityutskiy Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas.h | 1 + drivers/scsi/megaraid/megaraid_sas_base.c | 39 +++++++++++++++++++-- drivers/scsi/megaraid/megaraid_sas_fusion.c | 12 +++---- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index f3eddf93cf91..81de4a1fbb9b 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2099,6 +2099,7 @@ enum MR_PD_TYPE { struct megasas_instance { + unsigned int *reply_map; __le32 *producer; dma_addr_t producer_h; __le32 *consumer; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 14d5bd5fab15..985378e4bb6f 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5138,6 +5138,26 @@ megasas_setup_jbod_map(struct megasas_instance *instance) instance->use_seqnum_jbod_fp = false; } +static void megasas_setup_reply_map(struct megasas_instance *instance) +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + for (queue = 0; queue < instance->msix_vectors; queue++) { + mask = pci_irq_get_affinity(instance->pdev, queue); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + instance->reply_map[cpu] = queue; + } + return; + +fallback: + for_each_possible_cpu(cpu) + instance->reply_map[cpu] = cpu % instance->msix_vectors; +} + /** * megasas_init_fw - Initializes the FW * @instance: Adapter soft state @@ -5303,6 +5323,8 @@ static int megasas_init_fw(struct megasas_instance *instance) goto fail_setup_irqs; } + megasas_setup_reply_map(instance); + dev_info(&instance->pdev->dev, "firmware supports msix\t: (%d)", fw_msix_count); dev_info(&instance->pdev->dev, @@ -6046,20 +6068,29 @@ static inline int megasas_alloc_mfi_ctrl_mem(struct megasas_instance *instance) */ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) { + instance->reply_map = kzalloc(sizeof(unsigned int) * nr_cpu_ids, + GFP_KERNEL); + if (!instance->reply_map) + return -ENOMEM; + switch (instance->adapter_type) { case MFI_SERIES: if (megasas_alloc_mfi_ctrl_mem(instance)) - return -ENOMEM; + goto fail; break; case VENTURA_SERIES: case THUNDERBOLT_SERIES: case INVADER_SERIES: if (megasas_alloc_fusion_context(instance)) - return -ENOMEM; + goto fail; break; } return 0; + fail: + kfree(instance->reply_map); + instance->reply_map = NULL; + return -ENOMEM; } /* @@ -6071,6 +6102,7 @@ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) */ static inline void megasas_free_ctrl_mem(struct megasas_instance *instance) { + kfree(instance->reply_map); if (instance->adapter_type == MFI_SERIES) { if (instance->producer) pci_free_consistent(instance->pdev, sizeof(u32), @@ -6342,7 +6374,6 @@ static int megasas_probe_one(struct pci_dev *pdev, instance->tgt_prop_h); megasas_free_ctrl_mem(instance); scsi_host_put(host); - fail_alloc_instance: fail_set_dma_mask: pci_disable_device(pdev); @@ -6548,6 +6579,8 @@ megasas_resume(struct pci_dev *pdev) if (rval < 0) goto fail_reenable_msix; + megasas_setup_reply_map(instance); + if (instance->adapter_type != MFI_SERIES) { megasas_reset_reply_desc(instance); if (megasas_ioc_init_fusion(instance)) { diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 27f39c784c63..d8f626567f59 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2341,11 +2341,8 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, fp_possible = (io_info.fpOkForIo > 0) ? true : false; } - /* Use raw_smp_processor_id() for now until cmd->request->cpu is CPU - id by default, not CPU group id, otherwise all MSI-X queues won't - be utilized */ - cmd->request_desc->SCSIIO.MSIxIndex = instance->msix_vectors ? - raw_smp_processor_id() % instance->msix_vectors : 0; + cmd->request_desc->SCSIIO.MSIxIndex = + instance->reply_map[raw_smp_processor_id()]; praid_context = &io_request->RaidContext; @@ -2667,10 +2664,9 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, } cmd->request_desc->SCSIIO.DevHandle = io_request->DevHandle; - cmd->request_desc->SCSIIO.MSIxIndex = - instance->msix_vectors ? - (raw_smp_processor_id() % instance->msix_vectors) : 0; + cmd->request_desc->SCSIIO.MSIxIndex = + instance->reply_map[raw_smp_processor_id()]; if (!fp_possible) { /* system pd firmware path */ -- GitLab From 11c6be539e1a40fb508b17810e7336d5cbc7b579 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 6 Jul 2018 15:14:11 +0800 Subject: [PATCH 0147/1291] ALSA: hda/realtek - two more lenovo models need fixup of MIC_LOCATION commit c6b17f1020d956f4113d478cae6171b9093817ba upstream. We have two new lenovo desktop models which need to apply the fixup of ALC294_FIXUP_LENOVO_MIC_LOCATION, and they have the same pin cfg as the machine with subsystem id:0x17aa3136, now use the pincfg table to apply the fixup for them. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 02157e3d82bb..bf7737fc3b28 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6445,7 +6445,6 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x312a, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), - SND_PCI_QUIRK(0x17aa, 0x3136, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), @@ -6628,6 +6627,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x1a, 0x02a11040}, {0x1b, 0x01014020}, {0x21, 0x0221101f}), + SND_HDA_PIN_QUIRK(0x10ec0235, 0x17aa, "Lenovo", ALC294_FIXUP_LENOVO_MIC_LOCATION, + {0x14, 0x90170110}, + {0x19, 0x02a11020}, + {0x1a, 0x02a11030}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60140}, {0x14, 0x90170150}, -- GitLab From f329f46764b116fa3809113d1c14fc49bde27519 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jun 2018 07:25:32 +0100 Subject: [PATCH 0148/1291] ALSA: hda - Handle pm failure during hotplug commit aaa23f86001bdb82d2f937c5c7bce0a1e11a6c5b upstream. Obtaining the runtime pm wakeref can fail, especially in a hotplug scenario where i915.ko has been unloaded. If we do not catch the failure, we end up with an unbalanced pm. v2 additions by tiwai: hdmi_present_sense() checks the return value and handle only a negative error case and bails out only if it's really still suspended. Also, snd_hda_power_down() is called at the error path so that the refcount is balanced. Along with it, the spec->pcm_lock is taken outside hdmi_present_sense() in the caller side, so that it won't cause deadlock at reentrace via runtime resume. v3 fix by tiwai: Missing linux/pm_runtime.h is included. References: 222bde03881c ("ALSA: hda - Fix mutex deadlock at HDMI/DP hotplug") Signed-off-by: Chris Wilson Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 7d7eb1354eee..ffb6aba71998 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -764,8 +765,10 @@ static void check_presence_and_report(struct hda_codec *codec, hda_nid_t nid, if (pin_idx < 0) return; + mutex_lock(&spec->pcm_lock); if (hdmi_present_sense(get_pin(spec, pin_idx), 1)) snd_hda_jack_report_sync(codec); + mutex_unlock(&spec->pcm_lock); } static void jack_callback(struct hda_codec *codec, @@ -1628,21 +1631,23 @@ static void sync_eld_via_acomp(struct hda_codec *codec, static bool hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll) { struct hda_codec *codec = per_pin->codec; - struct hdmi_spec *spec = codec->spec; int ret; /* no temporary power up/down needed for component notifier */ - if (!codec_has_acomp(codec)) - snd_hda_power_up_pm(codec); + if (!codec_has_acomp(codec)) { + ret = snd_hda_power_up_pm(codec); + if (ret < 0 && pm_runtime_suspended(hda_codec_dev(codec))) { + snd_hda_power_down_pm(codec); + return false; + } + } - mutex_lock(&spec->pcm_lock); if (codec_has_acomp(codec)) { sync_eld_via_acomp(codec, per_pin); ret = false; /* don't call snd_hda_jack_report_sync() */ } else { ret = hdmi_present_sense_via_verbs(per_pin, repoll); } - mutex_unlock(&spec->pcm_lock); if (!codec_has_acomp(codec)) snd_hda_power_down_pm(codec); @@ -1654,12 +1659,16 @@ static void hdmi_repoll_eld(struct work_struct *work) { struct hdmi_spec_per_pin *per_pin = container_of(to_delayed_work(work), struct hdmi_spec_per_pin, work); + struct hda_codec *codec = per_pin->codec; + struct hdmi_spec *spec = codec->spec; if (per_pin->repoll_count++ > 6) per_pin->repoll_count = 0; + mutex_lock(&spec->pcm_lock); if (hdmi_present_sense(per_pin, per_pin->repoll_count)) snd_hda_jack_report_sync(per_pin->codec); + mutex_unlock(&spec->pcm_lock); } static void intel_haswell_fixup_connect_list(struct hda_codec *codec, -- GitLab From 684a2d8ed53f162dc731b117cd71239b77c47450 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 13 Jul 2018 16:58:52 -0700 Subject: [PATCH 0149/1291] mm: do not drop unused pages when userfaultd is running commit bce73e4842390f7b7309c8e253e139db71288ac3 upstream. KVM guests on s390 can notify the host of unused pages. This can result in pte_unused callbacks to be true for KVM guest memory. If a page is unused (checked with pte_unused) we might drop this page instead of paging it. This can have side-effects on userfaultd, when the page in question was already migrated: The next access of that page will trigger a fault and a user fault instead of faulting in a new and empty zero page. As QEMU does not expect a userfault on an already migrated page this migration will fail. The most straightforward solution is to ignore the pte_unused hint if a userfault context is active for this VMA. Link: http://lkml.kernel.org/r/20180703171854.63981-1-borntraeger@de.ibm.com Signed-off-by: Christian Borntraeger Cc: Martin Schwidefsky Cc: Andrea Arcangeli Cc: Mike Rapoport Cc: Janosch Frank Cc: David Hildenbrand Cc: Cornelia Huck Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/rmap.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/rmap.c b/mm/rmap.c index b874c4761e84..97edcf44d88c 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -64,6 +64,7 @@ #include #include #include +#include #include @@ -1476,11 +1477,16 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, set_pte_at(mm, address, pvmw.pte, pteval); } - } else if (pte_unused(pteval)) { + } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) { /* * The guest indicated that the page content is of no * interest anymore. Simply discard the pte, vmscan * will take care of the rest. + * A future reference will then fault in a new zero + * page. When userfaultfd is active, we must not drop + * this page though, as its main user (postcopy + * migration) will not expect userfaults on already + * copied pages. */ dec_mm_counter(mm, mm_counter(page)); } else if (IS_ENABLED(CONFIG_MIGRATION) && -- GitLab From e6f011384c922015e5a973a60cf565b543a18c22 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 13 Jul 2018 16:58:56 -0700 Subject: [PATCH 0150/1291] fs/proc/task_mmu.c: fix Locked field in /proc/pid/smaps* commit e70cc2bd579e8a9d6d153762f0fe294d0e652ff0 upstream. Thomas reports: "While looking around in /proc on my v4.14.52 system I noticed that all processes got a lot of "Locked" memory in /proc/*/smaps. A lot more memory than a regular user can usually lock with mlock(). Commit 493b0e9d945f (in v4.14-rc1) seems to have changed the behavior of "Locked". Before that commit the code was like this. Notice the VM_LOCKED check. (vma->vm_flags & VM_LOCKED) ? (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); After that commit Locked is now the same as Pss: (unsigned long)(mss->pss >> (10 + PSS_SHIFT))); This looks like a mistake." Indeed, the commit has added mss->pss_locked with the correct value that depends on VM_LOCKED, but forgot to actually use it. Fix it. Link: http://lkml.kernel.org/r/ebf6c7fb-fec3-6a26-544f-710ed193c154@suse.cz Fixes: 493b0e9d945f ("mm: add /proc/pid/smaps_rollup") Signed-off-by: Vlastimil Babka Reported-by: Thomas Lindroth Cc: Alexey Dobriyan Cc: Daniel Colascione Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/task_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4cd8328e4039..6f337fff38c4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -850,7 +850,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) mss->private_hugetlb >> 10, mss->swap >> 10, (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)), - (unsigned long)(mss->pss >> (10 + PSS_SHIFT))); + (unsigned long)(mss->pss_locked >> (10 + PSS_SHIFT))); if (!rollup_mode) { arch_show_smap(m, vma); -- GitLab From ff62981880d165c09957e68f147067b7fd7d5bbd Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Fri, 13 Jul 2018 16:59:13 -0700 Subject: [PATCH 0151/1291] fs, elf: make sure to page align bss in load_elf_library commit 24962af7e1041b7e50c1bc71d8d10dc678c556b5 upstream. The current code does not make sure to page align bss before calling vm_brk(), and this can lead to a VM_BUG_ON() in __mm_populate() due to the requested lenght not being correctly aligned. Let us make sure to align it properly. Kees: only applicable to CONFIG_USELIB kernels: 32-bit and configured for libc5. Link: http://lkml.kernel.org/r/20180705145539.9627-1-osalvador@techadventures.net Signed-off-by: Oscar Salvador Reported-by: syzbot+5dcb560fe12aa5091c06@syzkaller.appspotmail.com Tested-by: Tetsuo Handa Acked-by: Kees Cook Cc: Michal Hocko Cc: Nicolas Pitre Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/binfmt_elf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 73b01e474fdc..c0e3f91e28e9 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1235,9 +1235,8 @@ static int load_elf_library(struct file *file) goto out_free_ph; } - len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + - ELF_MIN_ALIGN - 1); - bss = eppnt->p_memsz + eppnt->p_vaddr; + len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr); + bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr); if (bss > len) { error = vm_brk(len, bss - len); if (error) -- GitLab From 81ebc9decd164fa4a874767e99ae65aec571dd8c Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 13 Jul 2018 16:59:20 -0700 Subject: [PATCH 0152/1291] mm: do not bug_on on incorrect length in __mm_populate() commit bb177a732c4369bb58a1fe1df8f552b6f0f7db5f upstream. syzbot has noticed that a specially crafted library can easily hit VM_BUG_ON in __mm_populate kernel BUG at mm/gup.c:1242! invalid opcode: 0000 [#1] SMP CPU: 2 PID: 9667 Comm: a.out Not tainted 4.18.0-rc3 #644 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017 RIP: 0010:__mm_populate+0x1e2/0x1f0 Code: 55 d0 65 48 33 14 25 28 00 00 00 89 d8 75 21 48 83 c4 20 5b 41 5c 41 5d 41 5e 41 5f 5d c3 e8 75 18 f1 ff 0f 0b e8 6e 18 f1 ff <0f> 0b 31 db eb c9 e8 93 06 e0 ff 0f 1f 00 55 48 89 e5 53 48 89 fb Call Trace: vm_brk_flags+0xc3/0x100 vm_brk+0x1f/0x30 load_elf_library+0x281/0x2e0 __ia32_sys_uselib+0x170/0x1e0 do_fast_syscall_32+0xca/0x420 entry_SYSENTER_compat+0x70/0x7f The reason is that the length of the new brk is not page aligned when we try to populate the it. There is no reason to bug on that though. do_brk_flags already aligns the length properly so the mapping is expanded as it should. All we need is to tell mm_populate about it. Besides that there is absolutely no reason to to bug_on in the first place. The worst thing that could happen is that the last page wouldn't get populated and that is far from putting system into an inconsistent state. Fix the issue by moving the length sanitization code from do_brk_flags up to vm_brk_flags. The only other caller of do_brk_flags is brk syscall entry and it makes sure to provide the proper length so t here is no need for sanitation and so we can use do_brk_flags without it. Also remove the bogus BUG_ONs. [osalvador@techadventures.net: fix up vm_brk_flags s@request@len@] Link: http://lkml.kernel.org/r/20180706090217.GI32658@dhcp22.suse.cz Signed-off-by: Michal Hocko Reported-by: syzbot Tested-by: Tetsuo Handa Reviewed-by: Oscar Salvador Cc: Zi Yan Cc: "Aneesh Kumar K.V" Cc: Dan Williams Cc: "Kirill A. Shutemov" Cc: Michael S. Tsirkin Cc: Al Viro Cc: "Huang, Ying" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/gup.c | 2 -- mm/mmap.c | 29 ++++++++++++----------------- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 72c921da0f3b..4cc8a6ff0f56 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1235,8 +1235,6 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) int locked = 0; long ret = 0; - VM_BUG_ON(start & ~PAGE_MASK); - VM_BUG_ON(len != PAGE_ALIGN(len)); end = start + len; for (nstart = start; nstart < end; nstart = nend) { diff --git a/mm/mmap.c b/mm/mmap.c index f858b1f336af..2398776195d2 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -177,8 +177,8 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) return next; } -static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf); - +static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, + struct list_head *uf); SYSCALL_DEFINE1(brk, unsigned long, brk) { unsigned long retval; @@ -236,7 +236,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) goto out; /* Ok, looks good - let it rip. */ - if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0) + if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0) goto out; set_brk: @@ -2887,21 +2887,14 @@ static inline void verify_mm_writelocked(struct mm_struct *mm) * anonymous maps. eventually we may be able to do some * brk-specific accounting here. */ -static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf) +static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; - unsigned long len; struct rb_node **rb_link, *rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; int error; - len = PAGE_ALIGN(request); - if (len < request) - return -ENOMEM; - if (!len) - return 0; - /* Until we need other flags, refuse anything except VM_EXEC. */ if ((flags & (~VM_EXEC)) != 0) return -EINVAL; @@ -2973,18 +2966,20 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long return 0; } -static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf) -{ - return do_brk_flags(addr, len, 0, uf); -} - -int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags) +int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) { struct mm_struct *mm = current->mm; + unsigned long len; int ret; bool populate; LIST_HEAD(uf); + len = PAGE_ALIGN(request); + if (len < request) + return -ENOMEM; + if (!len) + return 0; + if (down_write_killable(&mm->mmap_sem)) return -EINTR; -- GitLab From 36244e3a60cf44c0cb17fc1f3e4498479316bc9d Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Mon, 25 Jun 2018 17:08:22 -0700 Subject: [PATCH 0153/1291] tracing: Reorder display of TGID to be after PID commit f8494fa3dd10b52eab47a9666a8bc34719a129aa upstream. Currently ftrace displays data in trace output like so: _-----=> irqs-off / _----=> need-resched | / _---=> hardirq/softirq || / _--=> preempt-depth ||| / delay TASK-PID CPU TGID |||| TIMESTAMP FUNCTION | | | | |||| | | bash-1091 [000] ( 1091) d..2 28.313544: sched_switch: However Android's trace visualization tools expect a slightly different format due to an out-of-tree patch patch that was been carried for a decade, notice that the TGID and CPU fields are reversed: _-----=> irqs-off / _----=> need-resched | / _---=> hardirq/softirq || / _--=> preempt-depth ||| / delay TASK-PID TGID CPU |||| TIMESTAMP FUNCTION | | | | |||| | | bash-1091 ( 1091) [002] d..2 64.965177: sched_switch: From kernel v4.13 onwards, during which TGID was introduced, tracing with systrace on all Android kernels will break (most Android kernels have been on 4.9 with Android patches, so this issues hasn't been seen yet). From v4.13 onwards things will break. The chrome browser's tracing tools also embed the systrace viewer which uses the legacy TGID format and updates to that are known to be difficult to make. Considering this, I suggest we make this change to the upstream kernel and backport it to all Android kernels. I believe this feature is merged recently enough into the upstream kernel that it shouldn't be a problem. Also logically, IMO it makes more sense to group the TGID with the TASK-PID and the CPU after these. Link: http://lkml.kernel.org/r/20180626000822.113931-1-joel@joelfernandes.org Cc: jreck@google.com Cc: tkjos@google.com Cc: stable@vger.kernel.org Fixes: 441dae8f2f29 ("tracing: Add support for display of tgid in trace output") Signed-off-by: Joel Fernandes (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 8 ++++---- kernel/trace/trace_output.c | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 520ecaf61dc4..e268750bd4ad 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3359,8 +3359,8 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m, print_event_info(buf, m); - seq_printf(m, "# TASK-PID CPU# %s TIMESTAMP FUNCTION\n", tgid ? "TGID " : ""); - seq_printf(m, "# | | | %s | |\n", tgid ? " | " : ""); + seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : ""); + seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); } static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m, @@ -3380,9 +3380,9 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file tgid ? tgid_space : space); seq_printf(m, "# %s||| / delay\n", tgid ? tgid_space : space); - seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n", + seq_printf(m, "# TASK-PID %sCPU# |||| TIMESTAMP FUNCTION\n", tgid ? " TGID " : space); - seq_printf(m, "# | | | %s|||| | |\n", + seq_printf(m, "# | | %s | |||| | |\n", tgid ? " | " : space); } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index c738e764e2a5..4500b00e4e36 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -594,8 +594,7 @@ int trace_print_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); - trace_seq_printf(s, "%16s-%-5d [%03d] ", - comm, entry->pid, iter->cpu); + trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { unsigned int tgid = trace_find_tgid(entry->pid); @@ -606,6 +605,8 @@ int trace_print_context(struct trace_iterator *iter) trace_seq_printf(s, "(%5d) ", tgid); } + trace_seq_printf(s, "[%03d] ", iter->cpu); + if (tr->trace_flags & TRACE_ITER_IRQ_INFO) trace_print_lat_fmt(s, entry); -- GitLab From 324881805629d163a1dae9f05da3b57ee1bf4686 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Jul 2018 12:59:16 -0700 Subject: [PATCH 0154/1291] kbuild: delete INSTALL_FW_PATH from kbuild documentation commit 3f9cdee5929b7d035e86302dcf08fbf3e80b0739 upstream. Removed Kbuild documentation for INSTALL_FW_PATH. The kbuild symbol INSTALL_FW_PATH was removed from Kbuild tools in September 2017 (for 4.14) but the symbol was not deleted from the kbuild documentation, so do that now. Fixes: 5620a0d1aacd ("firmware: delete in-kernel firmware") Signed-off-by: Randy Dunlap Cc: stable@vger.kernel.org # 4.14+ Cc: Greg Kroah-Hartman Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- Documentation/kbuild/kbuild.txt | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt index ac2363ea05c5..82afdb7f0816 100644 --- a/Documentation/kbuild/kbuild.txt +++ b/Documentation/kbuild/kbuild.txt @@ -152,15 +152,6 @@ stripped after they are installed. If INSTALL_MOD_STRIP is '1', then the default option --strip-debug will be used. Otherwise, INSTALL_MOD_STRIP value will be used as the options to the strip command. -INSTALL_FW_PATH --------------------------------------------------- -INSTALL_FW_PATH specifies where to install the firmware blobs. -The default value is: - - $(INSTALL_MOD_PATH)/lib/firmware - -The value can be overridden in which case the default value is ignored. - INSTALL_HDR_PATH -------------------------------------------------- INSTALL_HDR_PATH specifies where to install user space headers when -- GitLab From d8148f7327488cbeaf5448ed839344b4b3de6efd Mon Sep 17 00:00:00 2001 From: Yandong Zhao Date: Wed, 11 Jul 2018 19:06:28 +0800 Subject: [PATCH 0155/1291] arm64: neon: Fix function may_use_simd() return error status commit 2fd8eb4ad87104c54800ef3cea498c92eb15c78a upstream. It does not matter if the caller of may_use_simd() migrates to another cpu after the call, but it is still important that the kernel_neon_busy percpu instance that is read matches the cpu the task is running on at the time of the read. This means that raw_cpu_read() is not sufficient. kernel_neon_busy may appear true if the caller migrates during the execution of raw_cpu_read() and the next task to be scheduled in on the initial cpu calls kernel_neon_begin(). This patch replaces raw_cpu_read() with this_cpu_read() to protect against this race. Cc: Fixes: cb84d11e1625 ("arm64: neon: Remove support for nested or hardirq kernel-mode NEON") Acked-by: Ard Biesheuvel Reviewed-by: Dave Martin Reviewed-by: Mark Rutland Signed-off-by: Yandong Zhao Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/simd.h | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index fa8b3fe932e6..6495cc51246f 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -29,20 +29,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy); static __must_check inline bool may_use_simd(void) { /* - * The raw_cpu_read() is racy if called with preemption enabled. - * This is not a bug: kernel_neon_busy is only set when - * preemption is disabled, so we cannot migrate to another CPU - * while it is set, nor can we migrate to a CPU where it is set. - * So, if we find it clear on some CPU then we're guaranteed to - * find it clear on any CPU we could migrate to. - * - * If we are in between kernel_neon_begin()...kernel_neon_end(), - * the flag will be set, but preemption is also disabled, so we - * can't migrate to another CPU and spuriously see it become - * false. + * kernel_neon_busy is only set while preemption is disabled, + * and is clear whenever preemption is enabled. Since + * this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy + * cannot change under our feet -- if it's set we cannot be + * migrated, and if it's clear we cannot be migrated to a CPU + * where it is set. */ return !in_irq() && !irqs_disabled() && !in_nmi() && - !raw_cpu_read(kernel_neon_busy); + !this_cpu_read(kernel_neon_busy); } #else /* ! CONFIG_KERNEL_MODE_NEON */ -- GitLab From e61f8cb63ededc7519afc32ffc94ffc68a83bf5f Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Tue, 5 Jun 2018 19:00:22 +0200 Subject: [PATCH 0156/1291] tools build: fix # escaping in .cmd files for future Make commit 9feeb638cde083c737e295c0547f1b4f28e99583 upstream. In 2016 GNU Make made a backwards incompatible change to the way '#' characters were handled in Makefiles when used inside functions or macros: http://git.savannah.gnu.org/cgit/make.git/commit/?id=c6966b323811c37acedff05b57 Due to this change, when attempting to run `make prepare' I get a spurious make syntax error: /home/earnest/linux/tools/objtool/.fixdep.o.cmd:1: *** missing separator. Stop. When inspecting `.fixdep.o.cmd' it includes two lines which use unescaped comment characters at the top: \# cannot find fixdep (/home/earnest/linux/tools/objtool//fixdep) \# using basic dep data This is because `tools/build/Build.include' prints these '\#' characters: printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \ printf '\# using basic dep data\n\n' >> $(dot-target).cmd; \ This completes commit 9564a8cf422d ("Kbuild: fix # escaping in .cmd files for future Make"). Link: https://bugzilla.kernel.org/show_bug.cgi?id=197847 Cc: Randy Dunlap Cc: Rasmus Villemoes Cc: stable@vger.kernel.org Signed-off-by: Paul Menzel Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- tools/build/Build.include | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/build/Build.include b/tools/build/Build.include index a4bbb984941d..d9048f145f97 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -63,8 +63,8 @@ dep-cmd = $(if $(wildcard $(fixdep)), $(fixdep) $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp; \ rm -f $(depfile); \ mv -f $(dot-target).tmp $(dot-target).cmd, \ - printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \ - printf '\# using basic dep data\n\n' >> $(dot-target).cmd; \ + printf '$(pound) cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \ + printf '$(pound) using basic dep data\n\n' >> $(dot-target).cmd; \ cat $(depfile) >> $(dot-target).cmd; \ printf '\n%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd) -- GitLab From ac5270d4bd46e1bfd44d4d907a6c14cfed84c38b Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Wed, 20 Jun 2018 09:29:08 -0700 Subject: [PATCH 0157/1291] IB/hfi1: Fix incorrect mixing of ERR_PTR and NULL return values commit b697d7d8c741f27b728a878fc55852b06d0f6f5e upstream. The __get_txreq() function can return a pointer, ERR_PTR(-EBUSY), or NULL. All of the relevant call sites look for IS_ERR, so the NULL return would lead to a NULL pointer exception. Do not use the ERR_PTR mechanism for this function. Update all call sites to handle the return value correctly. Clean up error paths to reflect return value. Fixes: 45842abbb292 ("staging/rdma/hfi1: move txreq header code") Cc: # 4.9.x+ Reported-by: Dan Carpenter Reviewed-by: Mike Marciniszyn Reviewed-by: Kamenee Arumugam Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/rc.c | 2 +- drivers/infiniband/hw/hfi1/uc.c | 4 ++-- drivers/infiniband/hw/hfi1/ud.c | 4 ++-- drivers/infiniband/hw/hfi1/verbs_txreq.c | 4 ++-- drivers/infiniband/hw/hfi1/verbs_txreq.h | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 84c6a6ff4a67..818bac1a4056 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -273,7 +273,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) lockdep_assert_held(&qp->s_lock); ps->s_txreq = get_txreq(ps->dev, qp); - if (IS_ERR(ps->s_txreq)) + if (!ps->s_txreq) goto bail_no_tx; ps->s_txreq->phdr.hdr.hdr_type = priv->hdr_type; diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 0b646173ca22..92e033fbb048 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -72,7 +72,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) int middle = 0; ps->s_txreq = get_txreq(ps->dev, qp); - if (IS_ERR(ps->s_txreq)) + if (!ps->s_txreq) goto bail_no_tx; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 38c7d9c456fe..37abd150fad3 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -479,7 +479,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) u32 lid; ps->s_txreq = get_txreq(ps->dev, qp); - if (IS_ERR(ps->s_txreq)) + if (!ps->s_txreq) goto bail_no_tx; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index 873e48ea923f..c4ab2d5b4502 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 - 2017 Intel Corporation. + * Copyright(c) 2016 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -94,7 +94,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, struct rvt_qp *qp) __must_hold(&qp->s_lock) { - struct verbs_txreq *tx = ERR_PTR(-EBUSY); + struct verbs_txreq *tx = NULL; write_seqlock(&dev->txwait_lock); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index 76216f2ef35a..22fc5ddf01ca 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -83,7 +83,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, if (unlikely(!tx)) { /* call slow path to get the lock */ tx = __get_txreq(dev, qp); - if (IS_ERR(tx)) + if (!tx) return tx; } tx->qp = qp; -- GitLab From 33beaca902a67b425cd0fc0fe2ba41cb9e1a0aaf Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 3 Jul 2018 09:55:43 +0100 Subject: [PATCH 0158/1291] i2c: tegra: Fix NACK error handling commit 54836e2d03e76d80aec3399368ffaf5b7caadd1b upstream. On Tegra30 Cardhu the PCA9546 I2C mux is not ACK'ing I2C commands on resume from suspend (which is caused by the reset signal for the I2C mux not being configured correctl). However, this NACK is causing the Tegra30 to hang on resuming from suspend which is not expected as we detect NACKs and handle them. The hang observed appears to occur when resetting the I2C controller to recover from the NACK. Commit 77821b4678f9 ("i2c: tegra: proper handling of error cases") added additional error handling for some error cases including NACK, however, it appears that this change conflicts with an early fix by commit f70893d08338 ("i2c: tegra: Add delay before resetting the controller after NACK"). After commit 77821b4678f9 was made we now disable 'packet mode' before the delay from commit f70893d08338 happens. Testing shows that moving the delay to before disabling 'packet mode' fixes the hang observed on Tegra30. The delay was added to give the I2C controller chance to send a stop condition and so it makes sense to move this to before we disable packet mode. Please note that packet mode is always enabled for Tegra. Fixes: 77821b4678f9 ("i2c: tegra: proper handling of error cases") Signed-off-by: Jon Hunter Acked-by: Thierry Reding Signed-off-by: Wolfram Sang Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-tegra.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 60292d243e24..ec2d11af6c78 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -547,6 +547,14 @@ static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev) { u32 cnfg; + /* + * NACK interrupt is generated before the I2C controller generates + * the STOP condition on the bus. So wait for 2 clock periods + * before disabling the controller so that the STOP condition has + * been delivered properly. + */ + udelay(DIV_ROUND_UP(2 * 1000000, i2c_dev->bus_clk_rate)); + cnfg = i2c_readl(i2c_dev, I2C_CNFG); if (cnfg & I2C_CNFG_PACKET_MODE_EN) i2c_writel(i2c_dev, cnfg & ~I2C_CNFG_PACKET_MODE_EN, I2C_CNFG); @@ -708,15 +716,6 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (likely(i2c_dev->msg_err == I2C_ERR_NONE)) return 0; - /* - * NACK interrupt is generated before the I2C controller generates - * the STOP condition on the bus. So wait for 2 clock periods - * before resetting the controller so that the STOP condition has - * been delivered properly. - */ - if (i2c_dev->msg_err == I2C_ERR_NO_ACK) - udelay(DIV_ROUND_UP(2 * 1000000, i2c_dev->bus_clk_rate)); - tegra_i2c_init(i2c_dev); if (i2c_dev->msg_err == I2C_ERR_NO_ACK) { if (msg->flags & I2C_M_IGNORE_NAK) -- GitLab From f47f1f976745a9dc9f77c36018a5ff278207be5b Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 21 Jun 2018 07:43:21 -0700 Subject: [PATCH 0159/1291] iw_cxgb4: correctly enforce the max reg_mr depth commit 7b72717a20bba8bdd01b14c0460be7d15061cd6b upstream. The code was mistakenly using the length of the page array memory instead of the depth of the page array. This would cause MR creation to fail in some cases. Fixes: 8376b86de7d3 ("iw_cxgb4: Support the new memory registration API") Cc: stable@vger.kernel.org Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/cxgb4/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index c2fba76becd4..b5784cb145f5 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -720,7 +720,7 @@ static int c4iw_set_page(struct ib_mr *ibmr, u64 addr) { struct c4iw_mr *mhp = to_c4iw_mr(ibmr); - if (unlikely(mhp->mpl_len == mhp->max_mpl_len)) + if (unlikely(mhp->mpl_len == mhp->attr.pbl_size)) return -ENOMEM; mhp->mpl[mhp->mpl_len++] = addr; -- GitLab From 54ca2776fccae337d0538b6b10811476cf669df1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 12 Jul 2018 17:40:34 +0200 Subject: [PATCH 0160/1291] xen: setup pv irq ops vector earlier commit 0ce0bba4e5e0eb9b753bb821785de5d23c494392 upstream. Setting pv_irq_ops for Xen PV domains should be done as early as possible in order to support e.g. very early printk() usage. The same applies to xen_vcpu_info_reset(0), as it is needed for the pv irq ops. Move the call of xen_setup_machphys_mapping() after initializing the pv functions as it contains a WARN_ON(), too. Remove the no longer necessary conditional in xen_init_irq_ops() from PVH V1 times to make clear this is a PV only function. Cc: # 4.14 Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/enlighten_pv.c | 24 +++++++++++------------- arch/x86/xen/irq.c | 4 +--- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index fcd8789470d1..fd173e6425cc 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1230,12 +1230,20 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_setup_features(); - xen_setup_machphys_mapping(); - /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops.patch = paravirt_patch_default; pv_cpu_ops = xen_cpu_ops; + xen_init_irq_ops(); + + /* + * Setup xen_vcpu early because it is needed for + * local_irq_disable(), irqs_disabled(), e.g. in printk(). + * + * Don't do the full vcpu_info placement stuff until we have + * the cpu_possible_mask and a non-dummy shared_info. + */ + xen_vcpu_info_reset(0); x86_platform.get_nmi_reason = xen_get_nmi_reason; @@ -1247,6 +1255,7 @@ asmlinkage __visible void __init xen_start_kernel(void) * Set up some pagetable state before starting to set any ptes. */ + xen_setup_machphys_mapping(); xen_init_mmu_ops(); /* Prevent unwanted bits from being set in PTEs. */ @@ -1271,20 +1280,9 @@ asmlinkage __visible void __init xen_start_kernel(void) get_cpu_cap(&boot_cpu_data); x86_configure_nx(); - xen_init_irq_ops(); - /* Let's presume PV guests always boot on vCPU with id 0. */ per_cpu(xen_vcpu_id, 0) = 0; - /* - * Setup xen_vcpu early because idt_setup_early_handler needs it for - * local_irq_disable(), irqs_disabled(). - * - * Don't do the full vcpu_info placement stuff until we have - * the cpu_possible_mask and a non-dummy shared_info. - */ - xen_vcpu_info_reset(0); - idt_setup_early_handler(); xen_init_capabilities(); diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 74179852e46c..7515a19fd324 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -128,8 +128,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = { void __init xen_init_irq_ops(void) { - /* For PVH we use default pv_irq_ops settings. */ - if (!xen_feature(XENFEAT_hvm_callback_vector)) - pv_irq_ops = xen_irq_ops; + pv_irq_ops = xen_irq_ops; x86_init.irqs.intr_init = xen_init_IRQ; } -- GitLab From 2a017ea2ea1448d8d9505f7589a537c2eed64f31 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 13 Feb 2018 05:44:44 -0700 Subject: [PATCH 0161/1291] nvme-pci: Remap CMB SQ entries on every controller reset commit 815c6704bf9f1c59f3a6be380a4032b9c57b12f1 upstream. The controller memory buffer is remapped into a kernel address on each reset, but the driver was setting the submission queue base address only on the very first queue creation. The remapped address is likely to change after a reset, so accessing the old address will hit a kernel bug. This patch fixes that by setting the queue's CMB base address each time the queue is created. Fixes: f63572dff1421 ("nvme: unmap CMB and remove sysfs file in reset path") Reported-by: Christian Black Cc: Jon Derrick Cc: # 4.9+ Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Scott Bauer Reviewed-by: Jon Derrick Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3d4724e38aa9..4cac4755abef 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1233,17 +1233,15 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, int qid, int depth) { - if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) { - unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth), - dev->ctrl.page_size); - nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset; - nvmeq->sq_cmds_io = dev->cmb + offset; - } else { - nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), - &nvmeq->sq_dma_addr, GFP_KERNEL); - if (!nvmeq->sq_cmds) - return -ENOMEM; - } + + /* CMB SQEs will be mapped before creation */ + if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) + return 0; + + nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), + &nvmeq->sq_dma_addr, GFP_KERNEL); + if (!nvmeq->sq_cmds) + return -ENOMEM; return 0; } @@ -1320,6 +1318,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) struct nvme_dev *dev = nvmeq->dev; int result; + if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) { + unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth), + dev->ctrl.page_size); + nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset; + nvmeq->sq_cmds_io = dev->cmb + offset; + } + nvmeq->cq_vector = qid - 1; result = adapter_alloc_cq(dev, qid, nvmeq); if (result < 0) -- GitLab From 19f39eff68b40746703285ee55209041fedca6b3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 26 May 2018 00:08:58 -0700 Subject: [PATCH 0162/1291] crypto: x86/salsa20 - remove x86 salsa20 implementations commit b7b73cd5d74694ed59abcdb4974dacb4ff8b2a2a upstream. The x86 assembly implementations of Salsa20 use the frame base pointer register (%ebp or %rbp), which breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Recent (v4.10+) kernels will warn about this, e.g. WARNING: kernel stack regs at 00000000a8291e69 in syzkaller047086:4677 has bad 'bp' value 000000001077994c [...] But after looking into it, I believe there's very little reason to still retain the x86 Salsa20 code. First, these are *not* vectorized (SSE2/SSSE3/AVX2) implementations, which would be needed to get anywhere close to the best Salsa20 performance on any remotely modern x86 processor; they're just regular x86 assembly. Second, it's still unclear that anyone is actually using the kernel's Salsa20 at all, especially given that now ChaCha20 is supported too, and with much more efficient SSSE3 and AVX2 implementations. Finally, in benchmarks I did on both Intel and AMD processors with both gcc 8.1.0 and gcc 4.9.4, the x86_64 salsa20-asm is actually slightly *slower* than salsa20-generic (~3% slower on Skylake, ~10% slower on Zen), while the i686 salsa20-asm is only slightly faster than salsa20-generic (~15% faster on Skylake, ~20% faster on Zen). The gcc version made little difference. So, the x86_64 salsa20-asm is pretty clearly useless. That leaves just the i686 salsa20-asm, which based on my tests provides a 15-20% speed boost. But that's without updating the code to not use %ebp. And given the maintenance cost, the small speed difference vs. salsa20-generic, the fact that few people still use i686 kernels, the doubt that anyone is even using the kernel's Salsa20 at all, and the fact that a SSE2 implementation would almost certainly be much faster on any remotely modern x86 processor yet no one has cared enough to add one yet, I don't think it's worthwhile to keep. Thus, just remove both the x86_64 and i686 salsa20-asm implementations. Reported-by: syzbot+ffa3a158337bbc01ff09@syzkaller.appspotmail.com Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/Makefile | 4 - arch/x86/crypto/salsa20-i586-asm_32.S | 1114 ----------------------- arch/x86/crypto/salsa20-x86_64-asm_64.S | 919 ------------------- arch/x86/crypto/salsa20_glue.c | 116 --- crypto/Kconfig | 26 - 5 files changed, 2179 deletions(-) delete mode 100644 arch/x86/crypto/salsa20-i586-asm_32.S delete mode 100644 arch/x86/crypto/salsa20-x86_64-asm_64.S delete mode 100644 arch/x86/crypto/salsa20_glue.c diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 5f07333bb224..9c903a420cda 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o -obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o @@ -24,7 +23,6 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o -obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o @@ -59,7 +57,6 @@ endif aes-i586-y := aes-i586-asm_32.o aes_glue.o twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o -salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o @@ -68,7 +65,6 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o -salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S deleted file mode 100644 index 329452b8f794..000000000000 --- a/arch/x86/crypto/salsa20-i586-asm_32.S +++ /dev/null @@ -1,1114 +0,0 @@ -# salsa20_pm.s version 20051229 -# D. J. Bernstein -# Public domain. - -#include - -.text - -# enter salsa20_encrypt_bytes -ENTRY(salsa20_encrypt_bytes) - mov %esp,%eax - and $31,%eax - add $256,%eax - sub %eax,%esp - # eax_stack = eax - movl %eax,80(%esp) - # ebx_stack = ebx - movl %ebx,84(%esp) - # esi_stack = esi - movl %esi,88(%esp) - # edi_stack = edi - movl %edi,92(%esp) - # ebp_stack = ebp - movl %ebp,96(%esp) - # x = arg1 - movl 4(%esp,%eax),%edx - # m = arg2 - movl 8(%esp,%eax),%esi - # out = arg3 - movl 12(%esp,%eax),%edi - # bytes = arg4 - movl 16(%esp,%eax),%ebx - # bytes -= 0 - sub $0,%ebx - # goto done if unsigned<= - jbe ._done -._start: - # in0 = *(uint32 *) (x + 0) - movl 0(%edx),%eax - # in1 = *(uint32 *) (x + 4) - movl 4(%edx),%ecx - # in2 = *(uint32 *) (x + 8) - movl 8(%edx),%ebp - # j0 = in0 - movl %eax,164(%esp) - # in3 = *(uint32 *) (x + 12) - movl 12(%edx),%eax - # j1 = in1 - movl %ecx,168(%esp) - # in4 = *(uint32 *) (x + 16) - movl 16(%edx),%ecx - # j2 = in2 - movl %ebp,172(%esp) - # in5 = *(uint32 *) (x + 20) - movl 20(%edx),%ebp - # j3 = in3 - movl %eax,176(%esp) - # in6 = *(uint32 *) (x + 24) - movl 24(%edx),%eax - # j4 = in4 - movl %ecx,180(%esp) - # in7 = *(uint32 *) (x + 28) - movl 28(%edx),%ecx - # j5 = in5 - movl %ebp,184(%esp) - # in8 = *(uint32 *) (x + 32) - movl 32(%edx),%ebp - # j6 = in6 - movl %eax,188(%esp) - # in9 = *(uint32 *) (x + 36) - movl 36(%edx),%eax - # j7 = in7 - movl %ecx,192(%esp) - # in10 = *(uint32 *) (x + 40) - movl 40(%edx),%ecx - # j8 = in8 - movl %ebp,196(%esp) - # in11 = *(uint32 *) (x + 44) - movl 44(%edx),%ebp - # j9 = in9 - movl %eax,200(%esp) - # in12 = *(uint32 *) (x + 48) - movl 48(%edx),%eax - # j10 = in10 - movl %ecx,204(%esp) - # in13 = *(uint32 *) (x + 52) - movl 52(%edx),%ecx - # j11 = in11 - movl %ebp,208(%esp) - # in14 = *(uint32 *) (x + 56) - movl 56(%edx),%ebp - # j12 = in12 - movl %eax,212(%esp) - # in15 = *(uint32 *) (x + 60) - movl 60(%edx),%eax - # j13 = in13 - movl %ecx,216(%esp) - # j14 = in14 - movl %ebp,220(%esp) - # j15 = in15 - movl %eax,224(%esp) - # x_backup = x - movl %edx,64(%esp) -._bytesatleast1: - # bytes - 64 - cmp $64,%ebx - # goto nocopy if unsigned>= - jae ._nocopy - # ctarget = out - movl %edi,228(%esp) - # out = &tmp - leal 0(%esp),%edi - # i = bytes - mov %ebx,%ecx - # while (i) { *out++ = *m++; --i } - rep movsb - # out = &tmp - leal 0(%esp),%edi - # m = &tmp - leal 0(%esp),%esi -._nocopy: - # out_backup = out - movl %edi,72(%esp) - # m_backup = m - movl %esi,68(%esp) - # bytes_backup = bytes - movl %ebx,76(%esp) - # in0 = j0 - movl 164(%esp),%eax - # in1 = j1 - movl 168(%esp),%ecx - # in2 = j2 - movl 172(%esp),%edx - # in3 = j3 - movl 176(%esp),%ebx - # x0 = in0 - movl %eax,100(%esp) - # x1 = in1 - movl %ecx,104(%esp) - # x2 = in2 - movl %edx,108(%esp) - # x3 = in3 - movl %ebx,112(%esp) - # in4 = j4 - movl 180(%esp),%eax - # in5 = j5 - movl 184(%esp),%ecx - # in6 = j6 - movl 188(%esp),%edx - # in7 = j7 - movl 192(%esp),%ebx - # x4 = in4 - movl %eax,116(%esp) - # x5 = in5 - movl %ecx,120(%esp) - # x6 = in6 - movl %edx,124(%esp) - # x7 = in7 - movl %ebx,128(%esp) - # in8 = j8 - movl 196(%esp),%eax - # in9 = j9 - movl 200(%esp),%ecx - # in10 = j10 - movl 204(%esp),%edx - # in11 = j11 - movl 208(%esp),%ebx - # x8 = in8 - movl %eax,132(%esp) - # x9 = in9 - movl %ecx,136(%esp) - # x10 = in10 - movl %edx,140(%esp) - # x11 = in11 - movl %ebx,144(%esp) - # in12 = j12 - movl 212(%esp),%eax - # in13 = j13 - movl 216(%esp),%ecx - # in14 = j14 - movl 220(%esp),%edx - # in15 = j15 - movl 224(%esp),%ebx - # x12 = in12 - movl %eax,148(%esp) - # x13 = in13 - movl %ecx,152(%esp) - # x14 = in14 - movl %edx,156(%esp) - # x15 = in15 - movl %ebx,160(%esp) - # i = 20 - mov $20,%ebp - # p = x0 - movl 100(%esp),%eax - # s = x5 - movl 120(%esp),%ecx - # t = x10 - movl 140(%esp),%edx - # w = x15 - movl 160(%esp),%ebx -._mainloop: - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x12 - addl 148(%esp),%eax - # x5 = s - movl %ecx,120(%esp) - # t += x6 - addl 124(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x1 - movl 104(%esp),%esi - # r += s - add %ecx,%esi - # v = x11 - movl 144(%esp),%edi - # v += w - add %ebx,%edi - # p <<<= 7 - rol $7,%eax - # p ^= x4 - xorl 116(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x14 - xorl 156(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x9 - xorl 136(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x3 - xorl 112(%esp),%edi - # x4 = p - movl %eax,116(%esp) - # x14 = t - movl %edx,156(%esp) - # p += x0 - addl 100(%esp),%eax - # x9 = r - movl %esi,136(%esp) - # t += x10 - addl 140(%esp),%edx - # x3 = v - movl %edi,112(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x8 - xorl 132(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x2 - xorl 108(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x13 - xorl 152(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x7 - xorl 128(%esp),%ebx - # x8 = p - movl %eax,132(%esp) - # x2 = t - movl %edx,108(%esp) - # p += x4 - addl 116(%esp),%eax - # x13 = s - movl %ecx,152(%esp) - # t += x14 - addl 156(%esp),%edx - # x7 = w - movl %ebx,128(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x12 - xorl 148(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x6 - xorl 124(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x1 - xorl 104(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x11 - xorl 144(%esp),%edi - # x12 = p - movl %eax,148(%esp) - # x6 = t - movl %edx,124(%esp) - # p += x8 - addl 132(%esp),%eax - # x1 = r - movl %esi,104(%esp) - # t += x2 - addl 108(%esp),%edx - # x11 = v - movl %edi,144(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x3 - addl 112(%esp),%eax - # p <<<= 7 - rol $7,%eax - # x5 = s - movl %ecx,120(%esp) - # t += x9 - addl 136(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x4 - movl 116(%esp),%esi - # r += s - add %ecx,%esi - # v = x14 - movl 156(%esp),%edi - # v += w - add %ebx,%edi - # p ^= x1 - xorl 104(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x11 - xorl 144(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x6 - xorl 124(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x12 - xorl 148(%esp),%edi - # x1 = p - movl %eax,104(%esp) - # x11 = t - movl %edx,144(%esp) - # p += x0 - addl 100(%esp),%eax - # x6 = r - movl %esi,124(%esp) - # t += x10 - addl 140(%esp),%edx - # x12 = v - movl %edi,148(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x2 - xorl 108(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x8 - xorl 132(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x7 - xorl 128(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x13 - xorl 152(%esp),%ebx - # x2 = p - movl %eax,108(%esp) - # x8 = t - movl %edx,132(%esp) - # p += x1 - addl 104(%esp),%eax - # x7 = s - movl %ecx,128(%esp) - # t += x11 - addl 144(%esp),%edx - # x13 = w - movl %ebx,152(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x3 - xorl 112(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x9 - xorl 136(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x4 - xorl 116(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x14 - xorl 156(%esp),%edi - # x3 = p - movl %eax,112(%esp) - # x9 = t - movl %edx,136(%esp) - # p += x2 - addl 108(%esp),%eax - # x4 = r - movl %esi,116(%esp) - # t += x8 - addl 132(%esp),%edx - # x14 = v - movl %edi,156(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x12 - addl 148(%esp),%eax - # x5 = s - movl %ecx,120(%esp) - # t += x6 - addl 124(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x1 - movl 104(%esp),%esi - # r += s - add %ecx,%esi - # v = x11 - movl 144(%esp),%edi - # v += w - add %ebx,%edi - # p <<<= 7 - rol $7,%eax - # p ^= x4 - xorl 116(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x14 - xorl 156(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x9 - xorl 136(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x3 - xorl 112(%esp),%edi - # x4 = p - movl %eax,116(%esp) - # x14 = t - movl %edx,156(%esp) - # p += x0 - addl 100(%esp),%eax - # x9 = r - movl %esi,136(%esp) - # t += x10 - addl 140(%esp),%edx - # x3 = v - movl %edi,112(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x8 - xorl 132(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x2 - xorl 108(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x13 - xorl 152(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x7 - xorl 128(%esp),%ebx - # x8 = p - movl %eax,132(%esp) - # x2 = t - movl %edx,108(%esp) - # p += x4 - addl 116(%esp),%eax - # x13 = s - movl %ecx,152(%esp) - # t += x14 - addl 156(%esp),%edx - # x7 = w - movl %ebx,128(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x12 - xorl 148(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x6 - xorl 124(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x1 - xorl 104(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x11 - xorl 144(%esp),%edi - # x12 = p - movl %eax,148(%esp) - # x6 = t - movl %edx,124(%esp) - # p += x8 - addl 132(%esp),%eax - # x1 = r - movl %esi,104(%esp) - # t += x2 - addl 108(%esp),%edx - # x11 = v - movl %edi,144(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x3 - addl 112(%esp),%eax - # p <<<= 7 - rol $7,%eax - # x5 = s - movl %ecx,120(%esp) - # t += x9 - addl 136(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x4 - movl 116(%esp),%esi - # r += s - add %ecx,%esi - # v = x14 - movl 156(%esp),%edi - # v += w - add %ebx,%edi - # p ^= x1 - xorl 104(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x11 - xorl 144(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x6 - xorl 124(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x12 - xorl 148(%esp),%edi - # x1 = p - movl %eax,104(%esp) - # x11 = t - movl %edx,144(%esp) - # p += x0 - addl 100(%esp),%eax - # x6 = r - movl %esi,124(%esp) - # t += x10 - addl 140(%esp),%edx - # x12 = v - movl %edi,148(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x2 - xorl 108(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x8 - xorl 132(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x7 - xorl 128(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x13 - xorl 152(%esp),%ebx - # x2 = p - movl %eax,108(%esp) - # x8 = t - movl %edx,132(%esp) - # p += x1 - addl 104(%esp),%eax - # x7 = s - movl %ecx,128(%esp) - # t += x11 - addl 144(%esp),%edx - # x13 = w - movl %ebx,152(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x3 - xorl 112(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x9 - xorl 136(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x4 - xorl 116(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x14 - xorl 156(%esp),%edi - # x3 = p - movl %eax,112(%esp) - # x9 = t - movl %edx,136(%esp) - # p += x2 - addl 108(%esp),%eax - # x4 = r - movl %esi,116(%esp) - # t += x8 - addl 132(%esp),%edx - # x14 = v - movl %edi,156(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # i -= 4 - sub $4,%ebp - # goto mainloop if unsigned > - ja ._mainloop - # x0 = p - movl %eax,100(%esp) - # x5 = s - movl %ecx,120(%esp) - # x10 = t - movl %edx,140(%esp) - # x15 = w - movl %ebx,160(%esp) - # out = out_backup - movl 72(%esp),%edi - # m = m_backup - movl 68(%esp),%esi - # in0 = x0 - movl 100(%esp),%eax - # in1 = x1 - movl 104(%esp),%ecx - # in0 += j0 - addl 164(%esp),%eax - # in1 += j1 - addl 168(%esp),%ecx - # in0 ^= *(uint32 *) (m + 0) - xorl 0(%esi),%eax - # in1 ^= *(uint32 *) (m + 4) - xorl 4(%esi),%ecx - # *(uint32 *) (out + 0) = in0 - movl %eax,0(%edi) - # *(uint32 *) (out + 4) = in1 - movl %ecx,4(%edi) - # in2 = x2 - movl 108(%esp),%eax - # in3 = x3 - movl 112(%esp),%ecx - # in2 += j2 - addl 172(%esp),%eax - # in3 += j3 - addl 176(%esp),%ecx - # in2 ^= *(uint32 *) (m + 8) - xorl 8(%esi),%eax - # in3 ^= *(uint32 *) (m + 12) - xorl 12(%esi),%ecx - # *(uint32 *) (out + 8) = in2 - movl %eax,8(%edi) - # *(uint32 *) (out + 12) = in3 - movl %ecx,12(%edi) - # in4 = x4 - movl 116(%esp),%eax - # in5 = x5 - movl 120(%esp),%ecx - # in4 += j4 - addl 180(%esp),%eax - # in5 += j5 - addl 184(%esp),%ecx - # in4 ^= *(uint32 *) (m + 16) - xorl 16(%esi),%eax - # in5 ^= *(uint32 *) (m + 20) - xorl 20(%esi),%ecx - # *(uint32 *) (out + 16) = in4 - movl %eax,16(%edi) - # *(uint32 *) (out + 20) = in5 - movl %ecx,20(%edi) - # in6 = x6 - movl 124(%esp),%eax - # in7 = x7 - movl 128(%esp),%ecx - # in6 += j6 - addl 188(%esp),%eax - # in7 += j7 - addl 192(%esp),%ecx - # in6 ^= *(uint32 *) (m + 24) - xorl 24(%esi),%eax - # in7 ^= *(uint32 *) (m + 28) - xorl 28(%esi),%ecx - # *(uint32 *) (out + 24) = in6 - movl %eax,24(%edi) - # *(uint32 *) (out + 28) = in7 - movl %ecx,28(%edi) - # in8 = x8 - movl 132(%esp),%eax - # in9 = x9 - movl 136(%esp),%ecx - # in8 += j8 - addl 196(%esp),%eax - # in9 += j9 - addl 200(%esp),%ecx - # in8 ^= *(uint32 *) (m + 32) - xorl 32(%esi),%eax - # in9 ^= *(uint32 *) (m + 36) - xorl 36(%esi),%ecx - # *(uint32 *) (out + 32) = in8 - movl %eax,32(%edi) - # *(uint32 *) (out + 36) = in9 - movl %ecx,36(%edi) - # in10 = x10 - movl 140(%esp),%eax - # in11 = x11 - movl 144(%esp),%ecx - # in10 += j10 - addl 204(%esp),%eax - # in11 += j11 - addl 208(%esp),%ecx - # in10 ^= *(uint32 *) (m + 40) - xorl 40(%esi),%eax - # in11 ^= *(uint32 *) (m + 44) - xorl 44(%esi),%ecx - # *(uint32 *) (out + 40) = in10 - movl %eax,40(%edi) - # *(uint32 *) (out + 44) = in11 - movl %ecx,44(%edi) - # in12 = x12 - movl 148(%esp),%eax - # in13 = x13 - movl 152(%esp),%ecx - # in12 += j12 - addl 212(%esp),%eax - # in13 += j13 - addl 216(%esp),%ecx - # in12 ^= *(uint32 *) (m + 48) - xorl 48(%esi),%eax - # in13 ^= *(uint32 *) (m + 52) - xorl 52(%esi),%ecx - # *(uint32 *) (out + 48) = in12 - movl %eax,48(%edi) - # *(uint32 *) (out + 52) = in13 - movl %ecx,52(%edi) - # in14 = x14 - movl 156(%esp),%eax - # in15 = x15 - movl 160(%esp),%ecx - # in14 += j14 - addl 220(%esp),%eax - # in15 += j15 - addl 224(%esp),%ecx - # in14 ^= *(uint32 *) (m + 56) - xorl 56(%esi),%eax - # in15 ^= *(uint32 *) (m + 60) - xorl 60(%esi),%ecx - # *(uint32 *) (out + 56) = in14 - movl %eax,56(%edi) - # *(uint32 *) (out + 60) = in15 - movl %ecx,60(%edi) - # bytes = bytes_backup - movl 76(%esp),%ebx - # in8 = j8 - movl 196(%esp),%eax - # in9 = j9 - movl 200(%esp),%ecx - # in8 += 1 - add $1,%eax - # in9 += 0 + carry - adc $0,%ecx - # j8 = in8 - movl %eax,196(%esp) - # j9 = in9 - movl %ecx,200(%esp) - # bytes - 64 - cmp $64,%ebx - # goto bytesatleast65 if unsigned> - ja ._bytesatleast65 - # goto bytesatleast64 if unsigned>= - jae ._bytesatleast64 - # m = out - mov %edi,%esi - # out = ctarget - movl 228(%esp),%edi - # i = bytes - mov %ebx,%ecx - # while (i) { *out++ = *m++; --i } - rep movsb -._bytesatleast64: - # x = x_backup - movl 64(%esp),%eax - # in8 = j8 - movl 196(%esp),%ecx - # in9 = j9 - movl 200(%esp),%edx - # *(uint32 *) (x + 32) = in8 - movl %ecx,32(%eax) - # *(uint32 *) (x + 36) = in9 - movl %edx,36(%eax) -._done: - # eax = eax_stack - movl 80(%esp),%eax - # ebx = ebx_stack - movl 84(%esp),%ebx - # esi = esi_stack - movl 88(%esp),%esi - # edi = edi_stack - movl 92(%esp),%edi - # ebp = ebp_stack - movl 96(%esp),%ebp - # leave - add %eax,%esp - ret -._bytesatleast65: - # bytes -= 64 - sub $64,%ebx - # out += 64 - add $64,%edi - # m += 64 - add $64,%esi - # goto bytesatleast1 - jmp ._bytesatleast1 -ENDPROC(salsa20_encrypt_bytes) - -# enter salsa20_keysetup -ENTRY(salsa20_keysetup) - mov %esp,%eax - and $31,%eax - add $256,%eax - sub %eax,%esp - # eax_stack = eax - movl %eax,64(%esp) - # ebx_stack = ebx - movl %ebx,68(%esp) - # esi_stack = esi - movl %esi,72(%esp) - # edi_stack = edi - movl %edi,76(%esp) - # ebp_stack = ebp - movl %ebp,80(%esp) - # k = arg2 - movl 8(%esp,%eax),%ecx - # kbits = arg3 - movl 12(%esp,%eax),%edx - # x = arg1 - movl 4(%esp,%eax),%eax - # in1 = *(uint32 *) (k + 0) - movl 0(%ecx),%ebx - # in2 = *(uint32 *) (k + 4) - movl 4(%ecx),%esi - # in3 = *(uint32 *) (k + 8) - movl 8(%ecx),%edi - # in4 = *(uint32 *) (k + 12) - movl 12(%ecx),%ebp - # *(uint32 *) (x + 4) = in1 - movl %ebx,4(%eax) - # *(uint32 *) (x + 8) = in2 - movl %esi,8(%eax) - # *(uint32 *) (x + 12) = in3 - movl %edi,12(%eax) - # *(uint32 *) (x + 16) = in4 - movl %ebp,16(%eax) - # kbits - 256 - cmp $256,%edx - # goto kbits128 if unsigned< - jb ._kbits128 -._kbits256: - # in11 = *(uint32 *) (k + 16) - movl 16(%ecx),%edx - # in12 = *(uint32 *) (k + 20) - movl 20(%ecx),%ebx - # in13 = *(uint32 *) (k + 24) - movl 24(%ecx),%esi - # in14 = *(uint32 *) (k + 28) - movl 28(%ecx),%ecx - # *(uint32 *) (x + 44) = in11 - movl %edx,44(%eax) - # *(uint32 *) (x + 48) = in12 - movl %ebx,48(%eax) - # *(uint32 *) (x + 52) = in13 - movl %esi,52(%eax) - # *(uint32 *) (x + 56) = in14 - movl %ecx,56(%eax) - # in0 = 1634760805 - mov $1634760805,%ecx - # in5 = 857760878 - mov $857760878,%edx - # in10 = 2036477234 - mov $2036477234,%ebx - # in15 = 1797285236 - mov $1797285236,%esi - # *(uint32 *) (x + 0) = in0 - movl %ecx,0(%eax) - # *(uint32 *) (x + 20) = in5 - movl %edx,20(%eax) - # *(uint32 *) (x + 40) = in10 - movl %ebx,40(%eax) - # *(uint32 *) (x + 60) = in15 - movl %esi,60(%eax) - # goto keysetupdone - jmp ._keysetupdone -._kbits128: - # in11 = *(uint32 *) (k + 0) - movl 0(%ecx),%edx - # in12 = *(uint32 *) (k + 4) - movl 4(%ecx),%ebx - # in13 = *(uint32 *) (k + 8) - movl 8(%ecx),%esi - # in14 = *(uint32 *) (k + 12) - movl 12(%ecx),%ecx - # *(uint32 *) (x + 44) = in11 - movl %edx,44(%eax) - # *(uint32 *) (x + 48) = in12 - movl %ebx,48(%eax) - # *(uint32 *) (x + 52) = in13 - movl %esi,52(%eax) - # *(uint32 *) (x + 56) = in14 - movl %ecx,56(%eax) - # in0 = 1634760805 - mov $1634760805,%ecx - # in5 = 824206446 - mov $824206446,%edx - # in10 = 2036477238 - mov $2036477238,%ebx - # in15 = 1797285236 - mov $1797285236,%esi - # *(uint32 *) (x + 0) = in0 - movl %ecx,0(%eax) - # *(uint32 *) (x + 20) = in5 - movl %edx,20(%eax) - # *(uint32 *) (x + 40) = in10 - movl %ebx,40(%eax) - # *(uint32 *) (x + 60) = in15 - movl %esi,60(%eax) -._keysetupdone: - # eax = eax_stack - movl 64(%esp),%eax - # ebx = ebx_stack - movl 68(%esp),%ebx - # esi = esi_stack - movl 72(%esp),%esi - # edi = edi_stack - movl 76(%esp),%edi - # ebp = ebp_stack - movl 80(%esp),%ebp - # leave - add %eax,%esp - ret -ENDPROC(salsa20_keysetup) - -# enter salsa20_ivsetup -ENTRY(salsa20_ivsetup) - mov %esp,%eax - and $31,%eax - add $256,%eax - sub %eax,%esp - # eax_stack = eax - movl %eax,64(%esp) - # ebx_stack = ebx - movl %ebx,68(%esp) - # esi_stack = esi - movl %esi,72(%esp) - # edi_stack = edi - movl %edi,76(%esp) - # ebp_stack = ebp - movl %ebp,80(%esp) - # iv = arg2 - movl 8(%esp,%eax),%ecx - # x = arg1 - movl 4(%esp,%eax),%eax - # in6 = *(uint32 *) (iv + 0) - movl 0(%ecx),%edx - # in7 = *(uint32 *) (iv + 4) - movl 4(%ecx),%ecx - # in8 = 0 - mov $0,%ebx - # in9 = 0 - mov $0,%esi - # *(uint32 *) (x + 24) = in6 - movl %edx,24(%eax) - # *(uint32 *) (x + 28) = in7 - movl %ecx,28(%eax) - # *(uint32 *) (x + 32) = in8 - movl %ebx,32(%eax) - # *(uint32 *) (x + 36) = in9 - movl %esi,36(%eax) - # eax = eax_stack - movl 64(%esp),%eax - # ebx = ebx_stack - movl 68(%esp),%ebx - # esi = esi_stack - movl 72(%esp),%esi - # edi = edi_stack - movl 76(%esp),%edi - # ebp = ebp_stack - movl 80(%esp),%ebp - # leave - add %eax,%esp - ret -ENDPROC(salsa20_ivsetup) diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S deleted file mode 100644 index 10db30d58006..000000000000 --- a/arch/x86/crypto/salsa20-x86_64-asm_64.S +++ /dev/null @@ -1,919 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include - -# enter salsa20_encrypt_bytes -ENTRY(salsa20_encrypt_bytes) - mov %rsp,%r11 - and $31,%r11 - add $256,%r11 - sub %r11,%rsp - # x = arg1 - mov %rdi,%r8 - # m = arg2 - mov %rsi,%rsi - # out = arg3 - mov %rdx,%rdi - # bytes = arg4 - mov %rcx,%rdx - # unsigned>? bytes - 0 - cmp $0,%rdx - # comment:fp stack unchanged by jump - # goto done if !unsigned> - jbe ._done - # comment:fp stack unchanged by fallthrough -# start: -._start: - # r11_stack = r11 - movq %r11,0(%rsp) - # r12_stack = r12 - movq %r12,8(%rsp) - # r13_stack = r13 - movq %r13,16(%rsp) - # r14_stack = r14 - movq %r14,24(%rsp) - # r15_stack = r15 - movq %r15,32(%rsp) - # rbx_stack = rbx - movq %rbx,40(%rsp) - # rbp_stack = rbp - movq %rbp,48(%rsp) - # in0 = *(uint64 *) (x + 0) - movq 0(%r8),%rcx - # in2 = *(uint64 *) (x + 8) - movq 8(%r8),%r9 - # in4 = *(uint64 *) (x + 16) - movq 16(%r8),%rax - # in6 = *(uint64 *) (x + 24) - movq 24(%r8),%r10 - # in8 = *(uint64 *) (x + 32) - movq 32(%r8),%r11 - # in10 = *(uint64 *) (x + 40) - movq 40(%r8),%r12 - # in12 = *(uint64 *) (x + 48) - movq 48(%r8),%r13 - # in14 = *(uint64 *) (x + 56) - movq 56(%r8),%r14 - # j0 = in0 - movq %rcx,56(%rsp) - # j2 = in2 - movq %r9,64(%rsp) - # j4 = in4 - movq %rax,72(%rsp) - # j6 = in6 - movq %r10,80(%rsp) - # j8 = in8 - movq %r11,88(%rsp) - # j10 = in10 - movq %r12,96(%rsp) - # j12 = in12 - movq %r13,104(%rsp) - # j14 = in14 - movq %r14,112(%rsp) - # x_backup = x - movq %r8,120(%rsp) -# bytesatleast1: -._bytesatleast1: - # unsigned>= 32 - shr $32,%rdi - # x3 = j2 - movq 64(%rsp),%rsi - # x2 = x3 - mov %rsi,%rcx - # (uint64) x3 >>= 32 - shr $32,%rsi - # x5 = j4 - movq 72(%rsp),%r8 - # x4 = x5 - mov %r8,%r9 - # (uint64) x5 >>= 32 - shr $32,%r8 - # x5_stack = x5 - movq %r8,160(%rsp) - # x7 = j6 - movq 80(%rsp),%r8 - # x6 = x7 - mov %r8,%rax - # (uint64) x7 >>= 32 - shr $32,%r8 - # x9 = j8 - movq 88(%rsp),%r10 - # x8 = x9 - mov %r10,%r11 - # (uint64) x9 >>= 32 - shr $32,%r10 - # x11 = j10 - movq 96(%rsp),%r12 - # x10 = x11 - mov %r12,%r13 - # x10_stack = x10 - movq %r13,168(%rsp) - # (uint64) x11 >>= 32 - shr $32,%r12 - # x13 = j12 - movq 104(%rsp),%r13 - # x12 = x13 - mov %r13,%r14 - # (uint64) x13 >>= 32 - shr $32,%r13 - # x15 = j14 - movq 112(%rsp),%r15 - # x14 = x15 - mov %r15,%rbx - # (uint64) x15 >>= 32 - shr $32,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # i = 20 - mov $20,%r15 -# mainloop: -._mainloop: - # i_backup = i - movq %r15,184(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x12 + x0 - lea (%r14,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x4 ^= a - xor %rbp,%r9 - # b = x1 + x5 - lea (%rdi,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x9 ^= b - xor %rbp,%r10 - # a = x0 + x4 - lea (%rdx,%r9),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x8 ^= a - xor %rbp,%r11 - # b = x5 + x9 - lea (%r15,%r10),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x13 ^= b - xor %rbp,%r13 - # a = x4 + x8 - lea (%r9,%r11),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x12 ^= a - xor %rbp,%r14 - # b = x9 + x13 - lea (%r10,%r13),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x1 ^= b - xor %rbp,%rdi - # a = x8 + x12 - lea (%r11,%r14),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x13 + x1 - lea (%r13,%rdi),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x6 + x10 - lea (%rax,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x14 ^= c - xor %r15,%rbx - # c = x10 + x14 - lea (%rbp,%rbx),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x2 ^= c - xor %r15,%rcx - # c = x14 + x2 - lea (%rbx,%rcx),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x6 ^= c - xor %r15,%rax - # c = x2 + x6 - lea (%rcx,%rax),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x11 + x15 - lea (%r12,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x3 ^= d - xor %rbp,%rsi - # d = x15 + x3 - lea (%r15,%rsi),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x7 ^= d - xor %rbp,%r8 - # d = x3 + x7 - lea (%rsi,%r8),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x11 ^= d - xor %rbp,%r12 - # d = x7 + x11 - lea (%r8,%r12),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x3 + x0 - lea (%rsi,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x1 ^= a - xor %rbp,%rdi - # b = x4 + x5 - lea (%r9,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x6 ^= b - xor %rbp,%rax - # a = x0 + x1 - lea (%rdx,%rdi),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x2 ^= a - xor %rbp,%rcx - # b = x5 + x6 - lea (%r15,%rax),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x7 ^= b - xor %rbp,%r8 - # a = x1 + x2 - lea (%rdi,%rcx),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x3 ^= a - xor %rbp,%rsi - # b = x6 + x7 - lea (%rax,%r8),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x4 ^= b - xor %rbp,%r9 - # a = x2 + x3 - lea (%rcx,%rsi),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x7 + x4 - lea (%r8,%r9),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x9 + x10 - lea (%r10,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x11 ^= c - xor %r15,%r12 - # c = x10 + x11 - lea (%rbp,%r12),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x8 ^= c - xor %r15,%r11 - # c = x11 + x8 - lea (%r12,%r11),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x9 ^= c - xor %r15,%r10 - # c = x8 + x9 - lea (%r11,%r10),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x14 + x15 - lea (%rbx,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x12 ^= d - xor %rbp,%r14 - # d = x15 + x12 - lea (%r15,%r14),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x13 ^= d - xor %rbp,%r13 - # d = x12 + x13 - lea (%r14,%r13),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x14 ^= d - xor %rbp,%rbx - # d = x13 + x14 - lea (%r13,%rbx),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x12 + x0 - lea (%r14,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x4 ^= a - xor %rbp,%r9 - # b = x1 + x5 - lea (%rdi,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x9 ^= b - xor %rbp,%r10 - # a = x0 + x4 - lea (%rdx,%r9),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x8 ^= a - xor %rbp,%r11 - # b = x5 + x9 - lea (%r15,%r10),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x13 ^= b - xor %rbp,%r13 - # a = x4 + x8 - lea (%r9,%r11),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x12 ^= a - xor %rbp,%r14 - # b = x9 + x13 - lea (%r10,%r13),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x1 ^= b - xor %rbp,%rdi - # a = x8 + x12 - lea (%r11,%r14),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x13 + x1 - lea (%r13,%rdi),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x6 + x10 - lea (%rax,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x14 ^= c - xor %r15,%rbx - # c = x10 + x14 - lea (%rbp,%rbx),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x2 ^= c - xor %r15,%rcx - # c = x14 + x2 - lea (%rbx,%rcx),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x6 ^= c - xor %r15,%rax - # c = x2 + x6 - lea (%rcx,%rax),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x11 + x15 - lea (%r12,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x3 ^= d - xor %rbp,%rsi - # d = x15 + x3 - lea (%r15,%rsi),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x7 ^= d - xor %rbp,%r8 - # d = x3 + x7 - lea (%rsi,%r8),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x11 ^= d - xor %rbp,%r12 - # d = x7 + x11 - lea (%r8,%r12),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x3 + x0 - lea (%rsi,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x1 ^= a - xor %rbp,%rdi - # b = x4 + x5 - lea (%r9,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x6 ^= b - xor %rbp,%rax - # a = x0 + x1 - lea (%rdx,%rdi),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x2 ^= a - xor %rbp,%rcx - # b = x5 + x6 - lea (%r15,%rax),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x7 ^= b - xor %rbp,%r8 - # a = x1 + x2 - lea (%rdi,%rcx),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x3 ^= a - xor %rbp,%rsi - # b = x6 + x7 - lea (%rax,%r8),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x4 ^= b - xor %rbp,%r9 - # a = x2 + x3 - lea (%rcx,%rsi),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x7 + x4 - lea (%r8,%r9),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x9 + x10 - lea (%r10,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x11 ^= c - xor %r15,%r12 - # c = x10 + x11 - lea (%rbp,%r12),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x8 ^= c - xor %r15,%r11 - # c = x11 + x8 - lea (%r12,%r11),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x9 ^= c - xor %r15,%r10 - # c = x8 + x9 - lea (%r11,%r10),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x14 + x15 - lea (%rbx,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x12 ^= d - xor %rbp,%r14 - # d = x15 + x12 - lea (%r15,%r14),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x13 ^= d - xor %rbp,%r13 - # d = x12 + x13 - lea (%r14,%r13),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x14 ^= d - xor %rbp,%rbx - # d = x13 + x14 - lea (%r13,%rbx),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # i = i_backup - movq 184(%rsp),%r15 - # unsigned>? i -= 4 - sub $4,%r15 - # comment:fp stack unchanged by jump - # goto mainloop if unsigned> - ja ._mainloop - # (uint32) x2 += j2 - addl 64(%rsp),%ecx - # x3 <<= 32 - shl $32,%rsi - # x3 += j2 - addq 64(%rsp),%rsi - # (uint64) x3 >>= 32 - shr $32,%rsi - # x3 <<= 32 - shl $32,%rsi - # x2 += x3 - add %rsi,%rcx - # (uint32) x6 += j6 - addl 80(%rsp),%eax - # x7 <<= 32 - shl $32,%r8 - # x7 += j6 - addq 80(%rsp),%r8 - # (uint64) x7 >>= 32 - shr $32,%r8 - # x7 <<= 32 - shl $32,%r8 - # x6 += x7 - add %r8,%rax - # (uint32) x8 += j8 - addl 88(%rsp),%r11d - # x9 <<= 32 - shl $32,%r10 - # x9 += j8 - addq 88(%rsp),%r10 - # (uint64) x9 >>= 32 - shr $32,%r10 - # x9 <<= 32 - shl $32,%r10 - # x8 += x9 - add %r10,%r11 - # (uint32) x12 += j12 - addl 104(%rsp),%r14d - # x13 <<= 32 - shl $32,%r13 - # x13 += j12 - addq 104(%rsp),%r13 - # (uint64) x13 >>= 32 - shr $32,%r13 - # x13 <<= 32 - shl $32,%r13 - # x12 += x13 - add %r13,%r14 - # (uint32) x0 += j0 - addl 56(%rsp),%edx - # x1 <<= 32 - shl $32,%rdi - # x1 += j0 - addq 56(%rsp),%rdi - # (uint64) x1 >>= 32 - shr $32,%rdi - # x1 <<= 32 - shl $32,%rdi - # x0 += x1 - add %rdi,%rdx - # x5 = x5_stack - movq 160(%rsp),%rdi - # (uint32) x4 += j4 - addl 72(%rsp),%r9d - # x5 <<= 32 - shl $32,%rdi - # x5 += j4 - addq 72(%rsp),%rdi - # (uint64) x5 >>= 32 - shr $32,%rdi - # x5 <<= 32 - shl $32,%rdi - # x4 += x5 - add %rdi,%r9 - # x10 = x10_stack - movq 168(%rsp),%r8 - # (uint32) x10 += j10 - addl 96(%rsp),%r8d - # x11 <<= 32 - shl $32,%r12 - # x11 += j10 - addq 96(%rsp),%r12 - # (uint64) x11 >>= 32 - shr $32,%r12 - # x11 <<= 32 - shl $32,%r12 - # x10 += x11 - add %r12,%r8 - # x15 = x15_stack - movq 176(%rsp),%rdi - # (uint32) x14 += j14 - addl 112(%rsp),%ebx - # x15 <<= 32 - shl $32,%rdi - # x15 += j14 - addq 112(%rsp),%rdi - # (uint64) x15 >>= 32 - shr $32,%rdi - # x15 <<= 32 - shl $32,%rdi - # x14 += x15 - add %rdi,%rbx - # out = out_backup - movq 136(%rsp),%rdi - # m = m_backup - movq 144(%rsp),%rsi - # x0 ^= *(uint64 *) (m + 0) - xorq 0(%rsi),%rdx - # *(uint64 *) (out + 0) = x0 - movq %rdx,0(%rdi) - # x2 ^= *(uint64 *) (m + 8) - xorq 8(%rsi),%rcx - # *(uint64 *) (out + 8) = x2 - movq %rcx,8(%rdi) - # x4 ^= *(uint64 *) (m + 16) - xorq 16(%rsi),%r9 - # *(uint64 *) (out + 16) = x4 - movq %r9,16(%rdi) - # x6 ^= *(uint64 *) (m + 24) - xorq 24(%rsi),%rax - # *(uint64 *) (out + 24) = x6 - movq %rax,24(%rdi) - # x8 ^= *(uint64 *) (m + 32) - xorq 32(%rsi),%r11 - # *(uint64 *) (out + 32) = x8 - movq %r11,32(%rdi) - # x10 ^= *(uint64 *) (m + 40) - xorq 40(%rsi),%r8 - # *(uint64 *) (out + 40) = x10 - movq %r8,40(%rdi) - # x12 ^= *(uint64 *) (m + 48) - xorq 48(%rsi),%r14 - # *(uint64 *) (out + 48) = x12 - movq %r14,48(%rdi) - # x14 ^= *(uint64 *) (m + 56) - xorq 56(%rsi),%rbx - # *(uint64 *) (out + 56) = x14 - movq %rbx,56(%rdi) - # bytes = bytes_backup - movq 152(%rsp),%rdx - # in8 = j8 - movq 88(%rsp),%rcx - # in8 += 1 - add $1,%rcx - # j8 = in8 - movq %rcx,88(%rsp) - # unsigned>? unsigned - ja ._bytesatleast65 - # comment:fp stack unchanged by jump - # goto bytesatleast64 if !unsigned< - jae ._bytesatleast64 - # m = out - mov %rdi,%rsi - # out = ctarget - movq 128(%rsp),%rdi - # i = bytes - mov %rdx,%rcx - # while (i) { *out++ = *m++; --i } - rep movsb - # comment:fp stack unchanged by fallthrough -# bytesatleast64: -._bytesatleast64: - # x = x_backup - movq 120(%rsp),%rdi - # in8 = j8 - movq 88(%rsp),%rsi - # *(uint64 *) (x + 32) = in8 - movq %rsi,32(%rdi) - # r11 = r11_stack - movq 0(%rsp),%r11 - # r12 = r12_stack - movq 8(%rsp),%r12 - # r13 = r13_stack - movq 16(%rsp),%r13 - # r14 = r14_stack - movq 24(%rsp),%r14 - # r15 = r15_stack - movq 32(%rsp),%r15 - # rbx = rbx_stack - movq 40(%rsp),%rbx - # rbp = rbp_stack - movq 48(%rsp),%rbp - # comment:fp stack unchanged by fallthrough -# done: -._done: - # leave - add %r11,%rsp - mov %rdi,%rax - mov %rsi,%rdx - ret -# bytesatleast65: -._bytesatleast65: - # bytes -= 64 - sub $64,%rdx - # out += 64 - add $64,%rdi - # m += 64 - add $64,%rsi - # comment:fp stack unchanged by jump - # goto bytesatleast1 - jmp ._bytesatleast1 -ENDPROC(salsa20_encrypt_bytes) - -# enter salsa20_keysetup -ENTRY(salsa20_keysetup) - mov %rsp,%r11 - and $31,%r11 - add $256,%r11 - sub %r11,%rsp - # k = arg2 - mov %rsi,%rsi - # kbits = arg3 - mov %rdx,%rdx - # x = arg1 - mov %rdi,%rdi - # in0 = *(uint64 *) (k + 0) - movq 0(%rsi),%r8 - # in2 = *(uint64 *) (k + 8) - movq 8(%rsi),%r9 - # *(uint64 *) (x + 4) = in0 - movq %r8,4(%rdi) - # *(uint64 *) (x + 12) = in2 - movq %r9,12(%rdi) - # unsigned - * - * The assembly codes are public domain assembly codes written by Daniel. J. - * Bernstein . The codes are modified to include indentation - * and to remove extraneous comments and functions that are not needed. - * - i586 version, renamed as salsa20-i586-asm_32.S - * available from - * - x86-64 version, renamed as salsa20-x86_64-asm_64.S - * available from - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include -#include -#include - -#define SALSA20_IV_SIZE 8U -#define SALSA20_MIN_KEY_SIZE 16U -#define SALSA20_MAX_KEY_SIZE 32U - -struct salsa20_ctx -{ - u32 input[16]; -}; - -asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k, - u32 keysize, u32 ivsize); -asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv); -asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx, - const u8 *src, u8 *dst, u32 bytes); - -static int setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keysize) -{ - struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm); - salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8); - return 0; -} - -static int encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct blkcipher_walk walk; - struct crypto_blkcipher *tfm = desc->tfm; - struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm); - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, 64); - - salsa20_ivsetup(ctx, walk.iv); - - while (walk.nbytes >= 64) { - salsa20_encrypt_bytes(ctx, walk.src.virt.addr, - walk.dst.virt.addr, - walk.nbytes - (walk.nbytes % 64)); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64); - } - - if (walk.nbytes) { - salsa20_encrypt_bytes(ctx, walk.src.virt.addr, - walk.dst.virt.addr, walk.nbytes); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; -} - -static struct crypto_alg alg = { - .cra_name = "salsa20", - .cra_driver_name = "salsa20-asm", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_type = &crypto_blkcipher_type, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct salsa20_ctx), - .cra_alignmask = 3, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .setkey = setkey, - .encrypt = encrypt, - .decrypt = encrypt, - .min_keysize = SALSA20_MIN_KEY_SIZE, - .max_keysize = SALSA20_MAX_KEY_SIZE, - .ivsize = SALSA20_IV_SIZE, - } - } -}; - -static int __init init(void) -{ - return crypto_register_alg(&alg); -} - -static void __exit fini(void) -{ - crypto_unregister_alg(&alg); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)"); -MODULE_ALIAS_CRYPTO("salsa20"); -MODULE_ALIAS_CRYPTO("salsa20-asm"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 42212b60a0ee..5579eb88d460 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1324,32 +1324,6 @@ config CRYPTO_SALSA20 The Salsa20 stream cipher algorithm is designed by Daniel J. Bernstein . See -config CRYPTO_SALSA20_586 - tristate "Salsa20 stream cipher algorithm (i586)" - depends on (X86 || UML_X86) && !64BIT - select CRYPTO_BLKCIPHER - help - Salsa20 stream cipher algorithm. - - Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT - Stream Cipher Project. See - - The Salsa20 stream cipher algorithm is designed by Daniel J. - Bernstein . See - -config CRYPTO_SALSA20_X86_64 - tristate "Salsa20 stream cipher algorithm (x86_64)" - depends on (X86 || UML_X86) && 64BIT - select CRYPTO_BLKCIPHER - help - Salsa20 stream cipher algorithm. - - Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT - Stream Cipher Project. See - - The Salsa20 stream cipher algorithm is designed by Daniel J. - Bernstein . See - config CRYPTO_CHACHA20 tristate "ChaCha20 cipher algorithm" select CRYPTO_BLKCIPHER -- GitLab From 0032322689bb5fbf480e7de18488d0c0659cb129 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 18 May 2018 18:27:39 +0200 Subject: [PATCH 0163/1291] uprobes/x86: Remove incorrect WARN_ON() in uprobe_init_insn() commit 90718e32e1dcc2479acfa208ccfc6442850b594c upstream. insn_get_length() has the side-effect of processing the entire instruction but only if it was decoded successfully, otherwise insn_complete() can fail and in this case we need to just return an error without warning. Reported-by: syzbot+30d675e3ca03c1c351e7@syzkaller.appspotmail.com Signed-off-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: syzkaller-bugs@googlegroups.com Link: https://lkml.kernel.org/lkml/20180518162739.GA5559@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index e1ea13ae53b9..b9a8f34b5e5a 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -290,7 +290,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64); /* has the side-effect of processing the entire instruction */ insn_get_length(insn); - if (WARN_ON_ONCE(!insn_complete(insn))) + if (!insn_complete(insn)) return -ENOEXEC; if (is_prefix_bad(insn)) -- GitLab From e5ee20c65b3404fa0aa43a6622664cb005bb2a7a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 13 Jun 2018 09:13:39 -0700 Subject: [PATCH 0164/1291] netfilter: nf_queue: augment nfqa_cfg_policy commit ba062ebb2cd561d404e0fba8ee4b3f5ebce7cbfc upstream. Three attributes are currently not verified, thus can trigger KMSAN warnings such as : BUG: KMSAN: uninit-value in __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline] BUG: KMSAN: uninit-value in __fswab32 include/uapi/linux/swab.h:59 [inline] BUG: KMSAN: uninit-value in nfqnl_recv_config+0x939/0x17d0 net/netfilter/nfnetlink_queue.c:1268 CPU: 1 PID: 4521 Comm: syz-executor120 Not tainted 4.17.0+ #5 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:113 kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1117 __msan_warning_32+0x70/0xc0 mm/kmsan/kmsan_instr.c:620 __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline] __fswab32 include/uapi/linux/swab.h:59 [inline] nfqnl_recv_config+0x939/0x17d0 net/netfilter/nfnetlink_queue.c:1268 nfnetlink_rcv_msg+0xb2e/0xc80 net/netfilter/nfnetlink.c:212 netlink_rcv_skb+0x37e/0x600 net/netlink/af_netlink.c:2448 nfnetlink_rcv+0x2fe/0x680 net/netfilter/nfnetlink.c:513 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x1680/0x1750 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg net/socket.c:639 [inline] ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117 __sys_sendmsg net/socket.c:2155 [inline] __do_sys_sendmsg net/socket.c:2164 [inline] __se_sys_sendmsg net/socket.c:2162 [inline] __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x43fd59 RSP: 002b:00007ffde0e30d28 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fd59 RDX: 0000000000000000 RSI: 0000000020000080 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000004002c8 R11: 0000000000000213 R12: 0000000000401680 R13: 0000000000401710 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:189 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:315 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan.c:322 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2753 [inline] __kmalloc_node_track_caller+0xb35/0x11b0 mm/slub.c:4395 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cb/0x9e0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:988 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline] netlink_sendmsg+0x76e/0x1350 net/netlink/af_netlink.c:1876 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg net/socket.c:639 [inline] ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117 __sys_sendmsg net/socket.c:2155 [inline] __do_sys_sendmsg net/socket.c:2164 [inline] __se_sys_sendmsg net/socket.c:2162 [inline] __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: fdb694a01f1f ("netfilter: Add fail-open support") Fixes: 829e17a1a602 ("[NETFILTER]: nfnetlink_queue: allow changing queue length through netlink") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nfnetlink_queue.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index c9796629858f..02bbc2f9f1f1 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1228,6 +1228,9 @@ static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl, static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, + [NFQA_CFG_QUEUE_MAXLEN] = { .type = NLA_U32 }, + [NFQA_CFG_MASK] = { .type = NLA_U32 }, + [NFQA_CFG_FLAGS] = { .type = NLA_U32 }, }; static const struct nf_queue_handler nfqh = { -- GitLab From 348b32aa3a37899724b961a7108feeb461b7ff2f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 7 Jun 2018 21:34:43 +0200 Subject: [PATCH 0165/1291] netfilter: x_tables: initialise match/target check parameter struct commit c568503ef02030f169c9e19204def610a3510918 upstream. syzbot reports following splat: BUG: KMSAN: uninit-value in ebt_stp_mt_check+0x24b/0x450 net/bridge/netfilter/ebt_stp.c:162 ebt_stp_mt_check+0x24b/0x450 net/bridge/netfilter/ebt_stp.c:162 xt_check_match+0x1438/0x1650 net/netfilter/x_tables.c:506 ebt_check_match net/bridge/netfilter/ebtables.c:372 [inline] ebt_check_entry net/bridge/netfilter/ebtables.c:702 [inline] The uninitialised access is xt_mtchk_param->nft_compat ... which should be set to 0. Fix it by zeroing the struct beforehand, same for tgchk. ip(6)tables targetinfo uses c99-style initialiser, so no change needed there. Reported-by: syzbot+da4494182233c23a5fcf@syzkaller.appspotmail.com Fixes: 55917a21d0cc0 ("netfilter: x_tables: add context to know if extension runs from nft_compat") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/bridge/netfilter/ebtables.c | 2 ++ net/ipv4/netfilter/ip_tables.c | 1 + net/ipv6/netfilter/ip6_tables.c | 1 + 3 files changed, 4 insertions(+) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index e27fb6e97d18..25738b20676d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -696,6 +696,8 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, } i = 0; + memset(&mtpar, 0, sizeof(mtpar)); + memset(&tgpar, 0, sizeof(tgpar)); mtpar.net = tgpar.net = net; mtpar.table = tgpar.table = name; mtpar.entryinfo = tgpar.entryinfo = e; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 1a925f2394ad..114d4bef1bec 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -541,6 +541,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, return -ENOMEM; j = 0; + memset(&mtpar, 0, sizeof(mtpar)); mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ip; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index c5fe42e6b7f7..2e51e0156903 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -561,6 +561,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, return -ENOMEM; j = 0; + memset(&mtpar, 0, sizeof(mtpar)); mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ipv6; -- GitLab From 6f9f5797fc7de2e9f801b2f7af0726e1349d33a4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 7 May 2018 11:37:58 -0400 Subject: [PATCH 0166/1291] loop: add recursion validation to LOOP_CHANGE_FD commit d2ac838e4cd7e5e9891ecc094d626734b0245c99 upstream. Refactor the validation code used in LOOP_SET_FD so it is also used in LOOP_CHANGE_FD. Otherwise it is possible to construct a set of loop devices that all refer to each other. This can lead to a infinite loop in starting with "while (is_loop_device(f)) .." in loop_set_fd(). Fix this by refactoring out the validation code and using it for LOOP_CHANGE_FD as well as LOOP_SET_FD. Reported-by: syzbot+4349872271ece473a7c91190b68b4bac7c5dbc87@syzkaller.appspotmail.com Reported-by: syzbot+40bd32c4d9a3cc12a339@syzkaller.appspotmail.com Reported-by: syzbot+769c54e66f994b041be7@syzkaller.appspotmail.com Reported-by: syzbot+0a89a9ce473936c57065@syzkaller.appspotmail.com Signed-off-by: Theodore Ts'o Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 68 +++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1a87f87c88d0..ac99d2b6e3f5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -617,6 +617,36 @@ static void loop_reread_partitions(struct loop_device *lo, __func__, lo->lo_number, lo->lo_file_name, rc); } +static inline int is_loop_device(struct file *file) +{ + struct inode *i = file->f_mapping->host; + + return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR; +} + +static int loop_validate_file(struct file *file, struct block_device *bdev) +{ + struct inode *inode = file->f_mapping->host; + struct file *f = file; + + /* Avoid recursion */ + while (is_loop_device(f)) { + struct loop_device *l; + + if (f->f_mapping->host->i_bdev == bdev) + return -EBADF; + + l = f->f_mapping->host->i_bdev->bd_disk->private_data; + if (l->lo_state == Lo_unbound) { + return -EINVAL; + } + f = l->lo_backing_file; + } + if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) + return -EINVAL; + return 0; +} + /* * loop_change_fd switched the backing store of a loopback device to * a new file. This is useful for operating system installers to free up @@ -646,14 +676,15 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (!file) goto out; + error = loop_validate_file(file, bdev); + if (error) + goto out_putf; + inode = file->f_mapping->host; old_file = lo->lo_backing_file; error = -EINVAL; - if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) - goto out_putf; - /* size of the new backing store needs to be the same */ if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) goto out_putf; @@ -679,13 +710,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, return error; } -static inline int is_loop_device(struct file *file) -{ - struct inode *i = file->f_mapping->host; - - return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR; -} - /* loop sysfs attributes */ static ssize_t loop_attr_show(struct device *dev, char *page, @@ -850,7 +874,7 @@ static int loop_prepare_queue(struct loop_device *lo) static int loop_set_fd(struct loop_device *lo, fmode_t mode, struct block_device *bdev, unsigned int arg) { - struct file *file, *f; + struct file *file; struct inode *inode; struct address_space *mapping; int lo_flags = 0; @@ -869,29 +893,13 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (lo->lo_state != Lo_unbound) goto out_putf; - /* Avoid recursion */ - f = file; - while (is_loop_device(f)) { - struct loop_device *l; - - if (f->f_mapping->host->i_bdev == bdev) - goto out_putf; - - l = f->f_mapping->host->i_bdev->bd_disk->private_data; - if (l->lo_state == Lo_unbound) { - error = -EINVAL; - goto out_putf; - } - f = l->lo_backing_file; - } + error = loop_validate_file(file, bdev); + if (error) + goto out_putf; mapping = file->f_mapping; inode = mapping->host; - error = -EINVAL; - if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) - goto out_putf; - if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) || !file->f_op->write_iter) lo_flags |= LO_FLAGS_READ_ONLY; -- GitLab From 140eae92cf7023422aec3a1647f78c1e0bdd6bf8 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 26 May 2018 09:59:36 +0900 Subject: [PATCH 0167/1291] PM / hibernate: Fix oops at snapshot_write() commit fc14eebfc20854a38fd9f1d93a42b1783dad4d17 upstream. syzbot is reporting NULL pointer dereference at snapshot_write() [1]. This is because data->handle is zero-cleared by ioctl(SNAPSHOT_FREE). Fix this by checking data_of(data->handle) != NULL before using it. [1] https://syzkaller.appspot.com/bug?id=828a3c71bd344a6de8b6a31233d51a72099f27fd Signed-off-by: Tetsuo Handa Reported-by: syzbot Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/user.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/power/user.c b/kernel/power/user.c index 22df9f7ff672..69017a569f30 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -186,6 +186,11 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, res = PAGE_SIZE - pg_offp; } + if (!data_of(data->handle)) { + res = -EINVAL; + goto unlock; + } + res = simple_write_to_buffer(data_of(data->handle), res, &pg_offp, buf, count); if (res > 0) -- GitLab From e8484443c9db30c7c84a867d41ffebe3a9794ced Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 23 May 2018 08:22:11 +0300 Subject: [PATCH 0168/1291] RDMA/ucm: Mark UCM interface as BROKEN commit 7a8690ed6f5346f6738971892205e91d39b6b901 upstream. In commit 357d23c811a7 ("Remove the obsolete libibcm library") in rdma-core [1], we removed obsolete library which used the /dev/infiniband/ucmX interface. Following multiple syzkaller reports about non-sanitized user input in the UCMA module, the short audit reveals the same issues in UCM module too. It is better to disable this interface in the kernel, before syzkaller team invests time and energy to harden this unused interface. [1] https://github.com/linux-rdma/rdma-core/pull/279 Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/Kconfig | 12 ++++++++++++ drivers/infiniband/core/Makefile | 4 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 7507cc641de3..27b3c39e586a 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -34,6 +34,18 @@ config INFINIBAND_USER_ACCESS libibverbs, libibcm and a hardware driver library from . +config INFINIBAND_USER_ACCESS_UCM + bool "Userspace CM (UCM, DEPRECATED)" + depends on BROKEN + depends on INFINIBAND_USER_ACCESS + help + The UCM module has known security flaws, which no one is + interested to fix. The user-space part of this code was + dropped from the upstream a long time ago. + + This option is DEPRECATED and planned to be removed. + + config INFINIBAND_EXP_USER_ACCESS bool "Allow experimental support for Infiniband ABI" depends on INFINIBAND_USER_ACCESS diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 9c0a2b5c834e..991c2522fb41 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -5,8 +5,8 @@ user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \ $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o -obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ - $(user_access-y) +obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y) +obj-$(CONFIG_INFINIBAND_USER_ACCESS_UCM) += ib_ucm.o $(user_access-y) ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ -- GitLab From d2c18ad18cc7041b0591847d4c4ea4802bbd122e Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 4 May 2018 10:58:09 -0600 Subject: [PATCH 0169/1291] loop: remember whether sysfs_create_group() was done commit d3349b6b3c373ac1fbfb040b810fcee5e2adc7e0 upstream. syzbot is hitting WARN() triggered by memory allocation fault injection [1] because loop module is calling sysfs_remove_group() when sysfs_create_group() failed. Fix this by remembering whether sysfs_create_group() succeeded. [1] https://syzkaller.appspot.com/bug?id=3f86c0edf75c86d2633aeb9dd69eccc70bc7e90b Signed-off-by: Tetsuo Handa Reported-by: syzbot Reviewed-by: Greg Kroah-Hartman Renamed sysfs_ready -> sysfs_inited. Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 11 ++++++----- drivers/block/loop.h | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ac99d2b6e3f5..6d61633a7f89 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -806,16 +806,17 @@ static struct attribute_group loop_attribute_group = { .attrs= loop_attrs, }; -static int loop_sysfs_init(struct loop_device *lo) +static void loop_sysfs_init(struct loop_device *lo) { - return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj, - &loop_attribute_group); + lo->sysfs_inited = !sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj, + &loop_attribute_group); } static void loop_sysfs_exit(struct loop_device *lo) { - sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj, - &loop_attribute_group); + if (lo->sysfs_inited) + sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj, + &loop_attribute_group); } static void loop_config_discard(struct loop_device *lo) diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 1f3956702993..dfc54ceba410 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -58,6 +58,7 @@ struct loop_device { struct kthread_worker worker; struct task_struct *worker_task; bool use_dio; + bool sysfs_inited; struct request_queue *lo_queue; struct blk_mq_tag_set tag_set; -- GitLab From eab3a34122153f37b4ab13bb76a91eb633eed39b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 23 Apr 2018 23:02:31 -0600 Subject: [PATCH 0170/1291] f2fs: give message and set need_fsck given broken node id commit a4f843bd004d775cbb360cd375969b8a479568a9 upstream. syzbot hit the following crash on upstream commit 83beed7b2b26f232d782127792dd0cd4362fdc41 (Fri Apr 20 17:56:32 2018 +0000) Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal syzbot dashboard link: https://syzkaller.appspot.com/bug?extid=d154ec99402c6f628887 C reproducer: https://syzkaller.appspot.com/x/repro.c?id=5414336294027264 syzkaller reproducer: https://syzkaller.appspot.com/x/repro.syz?id=5471683234234368 Raw console output: https://syzkaller.appspot.com/x/log.txt?id=5436660795834368 Kernel config: https://syzkaller.appspot.com/x/.config?id=1808800213120130118 compiler: gcc (GCC) 8.0.1 20180413 (experimental) IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+d154ec99402c6f628887@syzkaller.appspotmail.com It will help syzbot understand when the bug is fixed. See footer for details. If you forward the report, please keep this part and the footer. F2FS-fs (loop0): Magic Mismatch, valid(0xf2f52010) - read(0x0) F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock F2FS-fs (loop0): invalid crc value ------------[ cut here ]------------ kernel BUG at fs/f2fs/node.c:1185! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 4549 Comm: syzkaller704305 Not tainted 4.17.0-rc1+ #10 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__get_node_page+0xb68/0x16e0 fs/f2fs/node.c:1185 RSP: 0018:ffff8801d960e820 EFLAGS: 00010293 RAX: ffff8801d88205c0 RBX: 0000000000000003 RCX: ffffffff82f6cc06 RDX: 0000000000000000 RSI: ffffffff82f6d5e8 RDI: 0000000000000004 RBP: ffff8801d960ec30 R08: ffff8801d88205c0 R09: ffffed003b5e46c2 R10: 0000000000000003 R11: 0000000000000003 R12: ffff8801a86e00c0 R13: 0000000000000001 R14: ffff8801a86e0530 R15: ffff8801d9745240 FS: 000000000072c880(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3d403209b8 CR3: 00000001d8f3f000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: get_node_page fs/f2fs/node.c:1237 [inline] truncate_xattr_node+0x152/0x2e0 fs/f2fs/node.c:1014 remove_inode_page+0x200/0xaf0 fs/f2fs/node.c:1039 f2fs_evict_inode+0xe86/0x1710 fs/f2fs/inode.c:547 evict+0x4a6/0x960 fs/inode.c:557 iput_final fs/inode.c:1519 [inline] iput+0x62d/0xa80 fs/inode.c:1545 f2fs_fill_super+0x5f4e/0x7bf0 fs/f2fs/super.c:2849 mount_bdev+0x30c/0x3e0 fs/super.c:1164 f2fs_mount+0x34/0x40 fs/f2fs/super.c:3020 mount_fs+0xae/0x328 fs/super.c:1267 vfs_kern_mount.part.34+0xd4/0x4d0 fs/namespace.c:1037 vfs_kern_mount fs/namespace.c:1027 [inline] do_new_mount fs/namespace.c:2518 [inline] do_mount+0x564/0x3070 fs/namespace.c:2848 ksys_mount+0x12d/0x140 fs/namespace.c:3064 __do_sys_mount fs/namespace.c:3078 [inline] __se_sys_mount fs/namespace.c:3075 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3075 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x443dea RSP: 002b:00007ffcc7882368 EFLAGS: 00000297 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 0000000020000c00 RCX: 0000000000443dea RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007ffcc7882370 RBP: 0000000000000003 R08: 0000000020016a00 R09: 000000000000000a R10: 0000000000000000 R11: 0000000000000297 R12: 0000000000000004 R13: 0000000000402ce0 R14: 0000000000000000 R15: 0000000000000000 RIP: __get_node_page+0xb68/0x16e0 fs/f2fs/node.c:1185 RSP: ffff8801d960e820 ---[ end trace 4edbeb71f002bb76 ]--- Reported-and-tested-by: syzbot+d154ec99402c6f628887@syzkaller.appspotmail.com Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/f2fs.h | 13 +------------ fs/f2fs/inode.c | 13 ++++++------- fs/f2fs/node.c | 21 +++++++++++++++++++-- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4b4a72f392be..3b34004a71c1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1470,18 +1470,6 @@ static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi) is_set_ckpt_flags(sbi, CP_FASTBOOT_FLAG)); } -/* - * Check whether the given nid is within node id range. - */ -static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) -{ - if (unlikely(nid < F2FS_ROOT_INO(sbi))) - return -EINVAL; - if (unlikely(nid >= NM_I(sbi)->max_nid)) - return -EINVAL; - return 0; -} - /* * Check whether the inode has blocks or not */ @@ -2470,6 +2458,7 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, struct dnode_of_data; struct node_info; +int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid); bool available_free_memory(struct f2fs_sb_info *sbi, int type); int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 50c88e37ed66..259b0aa283f0 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -188,12 +188,8 @@ static int do_read_inode(struct inode *inode) projid_t i_projid; /* Check if ino is within scope */ - if (check_nid_range(sbi, inode->i_ino)) { - f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu", - (unsigned long) inode->i_ino); - WARN_ON(1); + if (check_nid_range(sbi, inode->i_ino)) return -EINVAL; - } node_page = get_node_page(sbi, inode->i_ino); if (IS_ERR(node_page)) @@ -538,8 +534,11 @@ void f2fs_evict_inode(struct inode *inode) alloc_nid_failed(sbi, inode->i_ino); clear_inode_flag(inode, FI_FREE_NID); } else { - f2fs_bug_on(sbi, err && - !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); + /* + * If xattr nid is corrupted, we can reach out error condition, + * err & !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)). + * In that case, check_nid_range() is enough to give a clue. + */ } out_clear: fscrypt_put_encryption_info(inode, NULL); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index fca87835a1da..f623da26159f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -29,6 +29,21 @@ static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; static struct kmem_cache *nat_entry_set_slab; +/* + * Check whether the given nid is within node id range. + */ +int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) +{ + if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: out-of-range nid=%x, run fsck to fix.", + __func__, nid); + return -EINVAL; + } + return 0; +} + bool available_free_memory(struct f2fs_sb_info *sbi, int type) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -1122,7 +1137,8 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) if (!nid) return; - f2fs_bug_on(sbi, check_nid_range(sbi, nid)); + if (check_nid_range(sbi, nid)) + return; rcu_read_lock(); apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid); @@ -1146,7 +1162,8 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, if (!nid) return ERR_PTR(-ENOENT); - f2fs_bug_on(sbi, check_nid_range(sbi, nid)); + if (check_nid_range(sbi, nid)) + return ERR_PTR(-EINVAL); repeat: page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); if (!page) -- GitLab From cff26c95b24c7b7147db17b623dee946bd25ec95 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 17 Jul 2018 11:39:34 +0200 Subject: [PATCH 0171/1291] Linux 4.14.56 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0700feaaa6cf..acbb0e3d29c9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 55 +SUBLEVEL = 56 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 779145a6f6ec8976846aeefd60171627e3d40328 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 21 Jun 2018 09:23:22 -0700 Subject: [PATCH 0172/1291] compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations commit d03db2bc26f0e4a6849ad649a09c9c73fccdc656 upstream. Functions marked extern inline do not emit an externally visible function when the gnu89 C standard is used. Some KBUILD Makefiles overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without an explicit C standard specified, the default is gnu11. Since c99, the semantics of extern inline have changed such that an externally visible function is always emitted. This can lead to multiple definition errors of extern inline functions at link time of compilation units whose build files have removed an explicit C standard compiler flag for users of GCC 5.1+ or Clang. Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Joe Perches Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@redhat.com Cc: akataria@vmware.com Cc: akpm@linux-foundation.org Cc: andrea.parri@amarulasolutions.com Cc: ard.biesheuvel@linaro.org Cc: aryabinin@virtuozzo.com Cc: astrachan@google.com Cc: boris.ostrovsky@oracle.com Cc: brijesh.singh@amd.com Cc: caoj.fnst@cn.fujitsu.com Cc: geert@linux-m68k.org Cc: ghackmann@google.com Cc: gregkh@linuxfoundation.org Cc: jan.kiszka@siemens.com Cc: jarkko.sakkinen@linux.intel.com Cc: jpoimboe@redhat.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: kstewart@linuxfoundation.org Cc: linux-efi@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: manojgupta@google.com Cc: mawilcox@microsoft.com Cc: michal.lkml@markovi.net Cc: mjg59@google.com Cc: mka@chromium.org Cc: pombredanne@nexb.com Cc: rientjes@google.com Cc: rostedt@goodmis.org Cc: sedat.dilek@gmail.com Cc: thomas.lendacky@amd.com Cc: tstellar@redhat.com Cc: tweek@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: yamada.masahiro@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulniers@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-gcc.h | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index f43113b8890b..c11032b06d68 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -65,6 +65,18 @@ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) #endif +/* + * Feature detection for gnu_inline (gnu89 extern inline semantics). Either + * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics, + * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not + * defined so the gnu89 semantics are the default. + */ +#ifdef __GNUC_STDC_INLINE__ +# define __gnu_inline __attribute__((gnu_inline)) +#else +# define __gnu_inline +#endif + /* * Force always-inline if the user requests it so via the .config, * or if gcc is too old. @@ -72,19 +84,22 @@ * -Wunused-function. This turns out to avoid the need for complex #ifdef * directives. Suppress the warning in clang as well by using "unused" * function attribute, which is redundant but not harmful for gcc. + * Prefer gnu_inline, so that extern inline functions do not emit an + * externally visible function. This makes extern inline behave as per gnu89 + * semantics rather than c99. This prevents multiple symbol definition errors + * of extern inline functions at link time. + * A lot of inline functions can cause havoc with function tracing. */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -#define inline inline __attribute__((always_inline,unused)) notrace -#define __inline__ __inline__ __attribute__((always_inline,unused)) notrace -#define __inline __inline __attribute__((always_inline,unused)) notrace +#define inline \ + inline __attribute__((always_inline, unused)) notrace __gnu_inline #else -/* A lot of inline functions can cause havoc with function tracing */ -#define inline inline __attribute__((unused)) notrace -#define __inline__ __inline__ __attribute__((unused)) notrace -#define __inline __inline __attribute__((unused)) notrace +#define inline inline __attribute__((unused)) notrace __gnu_inline #endif +#define __inline__ inline +#define __inline inline #define __always_inline inline __attribute__((always_inline)) #define noinline __attribute__((noinline)) -- GitLab From 92e50158fc0ace8e7a0c53845588cdc246ce5ca1 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 21 Jun 2018 09:23:23 -0700 Subject: [PATCH 0173/1291] x86/asm: Add _ASM_ARG* constants for argument registers to commit 0e2e160033283e20f688d8bad5b89460cc5bfcc4 upstream. i386 and x86-64 uses different registers for arguments; make them available so we don't have to #ifdef in the actual code. Native size and specified size (q, l, w, b) versions are provided. Signed-off-by: H. Peter Anvin Signed-off-by: Nick Desaulniers Reviewed-by: Sedat Dilek Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@redhat.com Cc: akataria@vmware.com Cc: akpm@linux-foundation.org Cc: andrea.parri@amarulasolutions.com Cc: ard.biesheuvel@linaro.org Cc: arnd@arndb.de Cc: aryabinin@virtuozzo.com Cc: astrachan@google.com Cc: boris.ostrovsky@oracle.com Cc: brijesh.singh@amd.com Cc: caoj.fnst@cn.fujitsu.com Cc: geert@linux-m68k.org Cc: ghackmann@google.com Cc: gregkh@linuxfoundation.org Cc: jan.kiszka@siemens.com Cc: jarkko.sakkinen@linux.intel.com Cc: joe@perches.com Cc: jpoimboe@redhat.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: kstewart@linuxfoundation.org Cc: linux-efi@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: manojgupta@google.com Cc: mawilcox@microsoft.com Cc: michal.lkml@markovi.net Cc: mjg59@google.com Cc: mka@chromium.org Cc: pombredanne@nexb.com Cc: rientjes@google.com Cc: rostedt@goodmis.org Cc: thomas.lendacky@amd.com Cc: tstellar@redhat.com Cc: tweek@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: yamada.masahiro@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulniers@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm.h | 59 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 386a6900e206..3bf87f92b932 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -46,6 +46,65 @@ #define _ASM_SI __ASM_REG(si) #define _ASM_DI __ASM_REG(di) +#ifndef __x86_64__ +/* 32 bit */ + +#define _ASM_ARG1 _ASM_AX +#define _ASM_ARG2 _ASM_DX +#define _ASM_ARG3 _ASM_CX + +#define _ASM_ARG1L eax +#define _ASM_ARG2L edx +#define _ASM_ARG3L ecx + +#define _ASM_ARG1W ax +#define _ASM_ARG2W dx +#define _ASM_ARG3W cx + +#define _ASM_ARG1B al +#define _ASM_ARG2B dl +#define _ASM_ARG3B cl + +#else +/* 64 bit */ + +#define _ASM_ARG1 _ASM_DI +#define _ASM_ARG2 _ASM_SI +#define _ASM_ARG3 _ASM_DX +#define _ASM_ARG4 _ASM_CX +#define _ASM_ARG5 r8 +#define _ASM_ARG6 r9 + +#define _ASM_ARG1Q rdi +#define _ASM_ARG2Q rsi +#define _ASM_ARG3Q rdx +#define _ASM_ARG4Q rcx +#define _ASM_ARG5Q r8 +#define _ASM_ARG6Q r9 + +#define _ASM_ARG1L edi +#define _ASM_ARG2L esi +#define _ASM_ARG3L edx +#define _ASM_ARG4L ecx +#define _ASM_ARG5L r8d +#define _ASM_ARG6L r9d + +#define _ASM_ARG1W di +#define _ASM_ARG2W si +#define _ASM_ARG3W dx +#define _ASM_ARG4W cx +#define _ASM_ARG5W r8w +#define _ASM_ARG6W r9w + +#define _ASM_ARG1B dil +#define _ASM_ARG2B sil +#define _ASM_ARG3B dl +#define _ASM_ARG4B cl +#define _ASM_ARG5B r8b +#define _ASM_ARG6B r9b + +#endif + /* * Macros to generate condition code outputs from inline assembly, * The output operand must be type "bool". -- GitLab From edefb935700c130ff52a0a0da7edab7a34d7572c Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 21 Jun 2018 09:23:24 -0700 Subject: [PATCH 0174/1291] x86/paravirt: Make native_save_fl() extern inline commit d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 upstream. native_save_fl() is marked static inline, but by using it as a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined. paravirt's use of native_save_fl() also requires that no GPRs other than %rax are clobbered. Compilers have different heuristics which they use to emit stack guard code, the emittance of which can break paravirt's callee saved assumption by clobbering %rcx. Marking a function definition extern inline means that if this version cannot be inlined, then the out-of-line version will be preferred. By having the out-of-line version be implemented in assembly, it cannot be instrumented with a stack protector, which might violate custom calling conventions that code like paravirt rely on. The semantics of extern inline has changed since gnu89. This means that folks using GCC versions >= 5.1 may see symbol redefinition errors at link time for subdirs that override KBUILD_CFLAGS (making the C standard used implicit) regardless of this patch. This has been cleaned up earlier in the patch set, but is left as a note in the commit message for future travelers. Reports: https://lkml.org/lkml/2018/5/7/534 https://github.com/ClangBuiltLinux/linux/issues/16 Discussion: https://bugs.llvm.org/show_bug.cgi?id=37512 https://lkml.org/lkml/2018/5/24/1371 Thanks to the many folks that participated in the discussion. Debugged-by: Alistair Strachan Debugged-by: Matthias Kaehlcke Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Tom Stellar Reported-by: Sedat Dilek Tested-by: Sedat Dilek Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@redhat.com Cc: akataria@vmware.com Cc: akpm@linux-foundation.org Cc: andrea.parri@amarulasolutions.com Cc: ard.biesheuvel@linaro.org Cc: aryabinin@virtuozzo.com Cc: astrachan@google.com Cc: boris.ostrovsky@oracle.com Cc: brijesh.singh@amd.com Cc: caoj.fnst@cn.fujitsu.com Cc: geert@linux-m68k.org Cc: ghackmann@google.com Cc: gregkh@linuxfoundation.org Cc: jan.kiszka@siemens.com Cc: jarkko.sakkinen@linux.intel.com Cc: joe@perches.com Cc: jpoimboe@redhat.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: kstewart@linuxfoundation.org Cc: linux-efi@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: manojgupta@google.com Cc: mawilcox@microsoft.com Cc: michal.lkml@markovi.net Cc: mjg59@google.com Cc: mka@chromium.org Cc: pombredanne@nexb.com Cc: rientjes@google.com Cc: rostedt@goodmis.org Cc: thomas.lendacky@amd.com Cc: tweek@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: yamada.masahiro@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulniers@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/irqflags.h | 2 +- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/irqflags.S | 26 ++++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/irqflags.S diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 89f08955fff7..c4fc17220df9 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -13,7 +13,7 @@ * Interrupt control: */ -static inline unsigned long native_save_fl(void) +extern inline unsigned long native_save_fl(void) { unsigned long flags; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 295abaa58add..4137f7ba0f88 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -58,6 +58,7 @@ obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-y += irqflags.o obj-y += process.o obj-y += fpu/ diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S new file mode 100644 index 000000000000..ddeeaac8adda --- /dev/null +++ b/arch/x86/kernel/irqflags.S @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include + +/* + * unsigned long native_save_fl(void) + */ +ENTRY(native_save_fl) + pushf + pop %_ASM_AX + ret +ENDPROC(native_save_fl) +EXPORT_SYMBOL(native_save_fl) + +/* + * void native_restore_fl(unsigned long flags) + * %eax/%rdi: flags + */ +ENTRY(native_restore_fl) + push %_ASM_ARG1 + popf + ret +ENDPROC(native_restore_fl) +EXPORT_SYMBOL(native_restore_fl) -- GitLab From 61a9f6b7fe0ca9706b49a23cecf5f9a9c802b6ce Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 9 May 2018 16:01:46 +0100 Subject: [PATCH 0175/1291] Btrfs: fix duplicate extents after fsync of file with prealloc extents commit 31d11b83b96faaee4bb514d375a09489117c3e8d upstream. In commit 471d557afed1 ("Btrfs: fix loss of prealloc extents past i_size after fsync log replay"), on fsync, we started to always log all prealloc extents beyond an inode's i_size in order to avoid losing them after a power failure. However under some cases this can lead to the log replay code to create duplicate extent items, with different lengths, in the extent tree. That happens because, as of that commit, we can now log extent items based on extent maps that are not on the "modified" list of extent maps of the inode's extent map tree. Logging extent items based on extent maps is used during the fast fsync path to save time and for this to work reliably it requires that the extent maps are not merged with other adjacent extent maps - having the extent maps in the list of modified extents gives such guarantee. Consider the following example, captured during a long run of fsstress, which illustrates this problem. We have inode 271, in the filesystem tree (root 5), for which all of the following operations and discussion apply to. A buffered write starts at offset 312391 with a length of 933471 bytes (end offset at 1245862). At this point we have, for this inode, the following extent maps with the their field values: em A, start 0, orig_start 0, len 40960, block_start 18446744073709551613, block_len 0, orig_block_len 0 em B, start 40960, orig_start 40960, len 376832, block_start 1106399232, block_len 376832, orig_block_len 376832 em C, start 417792, orig_start 417792, len 782336, block_start 18446744073709551613, block_len 0, orig_block_len 0 em D, start 1200128, orig_start 1200128, len 835584, block_start 1106776064, block_len 835584, orig_block_len 835584 em E, start 2035712, orig_start 2035712, len 245760, block_start 1107611648, block_len 245760, orig_block_len 245760 Extent map A corresponds to a hole and extent maps D and E correspond to preallocated extents. Extent map D ends where extent map E begins (1106776064 + 835584 = 1107611648), but these extent maps were not merged because they are in the inode's list of modified extent maps. An fsync against this inode is made, which triggers the fast path (BTRFS_INODE_NEEDS_FULL_SYNC is not set). This fsync triggers writeback of the data previously written using buffered IO, and when the respective ordered extent finishes, btrfs_drop_extents() is called against the (aligned) range 311296..1249279. This causes a split of extent map D at btrfs_drop_extent_cache(), replacing extent map D with a new extent map D', also added to the list of modified extents, with the following values: em D', start 1249280, orig_start of 1200128, block_start 1106825216 (= 1106776064 + 1249280 - 1200128), orig_block_len 835584, block_len 786432 (835584 - (1249280 - 1200128)) Then, during the fast fsync, btrfs_log_changed_extents() is called and extent maps D' and E are removed from the list of modified extents. The flag EXTENT_FLAG_LOGGING is also set on them. After the extents are logged clear_em_logging() is called on each of them, and that makes extent map E to be merged with extent map D' (try_merge_map()), resulting in D' being deleted and E adjusted to: em E, start 1249280, orig_start 1200128, len 1032192, block_start 1106825216, block_len 1032192, orig_block_len 245760 A direct IO write at offset 1847296 and length of 360448 bytes (end offset at 2207744) starts, and at that moment the following extent maps exist for our inode: em A, start 0, orig_start 0, len 40960, block_start 18446744073709551613, block_len 0, orig_block_len 0 em B, start 40960, orig_start 40960, len 270336, block_start 1106399232, block_len 270336, orig_block_len 376832 em C, start 311296, orig_start 311296, len 937984, block_start 1112842240, block_len 937984, orig_block_len 937984 em E (prealloc), start 1249280, orig_start 1200128, len 1032192, block_start 1106825216, block_len 1032192, orig_block_len 245760 The dio write results in drop_extent_cache() being called twice. The first time for a range that starts at offset 1847296 and ends at offset 2035711 (length of 188416), which results in a double split of extent map E, replacing it with two new extent maps: em F, start 1249280, orig_start 1200128, block_start 1106825216, block_len 598016, orig_block_len 598016 em G, start 2035712, orig_start 1200128, block_start 1107611648, block_len 245760, orig_block_len 1032192 It also creates a new extent map that represents a part of the requested IO (through create_io_em()): em H, start 1847296, len 188416, block_start 1107423232, block_len 188416 The second call to drop_extent_cache() has a range with a start offset of 2035712 and end offset of 2207743 (length of 172032). This leads to replacing extent map G with a new extent map I with the following values: em I, start 2207744, orig_start 1200128, block_start 1107783680, block_len 73728, orig_block_len 1032192 It also creates a new extent map that represents the second part of the requested IO (through create_io_em()): em J, start 2035712, len 172032, block_start 1107611648, block_len 172032 The dio write set the inode's i_size to 2207744 bytes. After the dio write the inode has the following extent maps: em A, start 0, orig_start 0, len 40960, block_start 18446744073709551613, block_len 0, orig_block_len 0 em B, start 40960, orig_start 40960, len 270336, block_start 1106399232, block_len 270336, orig_block_len 376832 em C, start 311296, orig_start 311296, len 937984, block_start 1112842240, block_len 937984, orig_block_len 937984 em F, start 1249280, orig_start 1200128, len 598016, block_start 1106825216, block_len 598016, orig_block_len 598016 em H, start 1847296, orig_start 1200128, len 188416, block_start 1107423232, block_len 188416, orig_block_len 835584 em J, start 2035712, orig_start 2035712, len 172032, block_start 1107611648, block_len 172032, orig_block_len 245760 em I, start 2207744, orig_start 1200128, len 73728, block_start 1107783680, block_len 73728, orig_block_len 1032192 Now do some change to the file, like adding a xattr for example and then fsync it again. This triggers a fast fsync path, and as of commit 471d557afed1 ("Btrfs: fix loss of prealloc extents past i_size after fsync log replay"), we use the extent map I to log a file extent item because it's a prealloc extent and it starts at an offset matching the inode's i_size. However when we log it, we create a file extent item with a value for the disk byte location that is wrong, as can be seen from the following output of "btrfs inspect-internal dump-tree": item 1 key (271 EXTENT_DATA 2207744) itemoff 3782 itemsize 53 generation 22 type 2 (prealloc) prealloc data disk byte 1106776064 nr 1032192 prealloc data offset 1007616 nr 73728 Here the disk byte value corresponds to calculation based on some fields from the extent map I: 1106776064 = block_start (1107783680) - 1007616 (extent_offset) extent_offset = 2207744 (start) - 1200128 (orig_start) = 1007616 The disk byte value of 1106776064 clashes with disk byte values of the file extent items at offsets 1249280 and 1847296 in the fs tree: item 6 key (271 EXTENT_DATA 1249280) itemoff 3568 itemsize 53 generation 20 type 2 (prealloc) prealloc data disk byte 1106776064 nr 835584 prealloc data offset 49152 nr 598016 item 7 key (271 EXTENT_DATA 1847296) itemoff 3515 itemsize 53 generation 20 type 1 (regular) extent data disk byte 1106776064 nr 835584 extent data offset 647168 nr 188416 ram 835584 extent compression 0 (none) item 8 key (271 EXTENT_DATA 2035712) itemoff 3462 itemsize 53 generation 20 type 1 (regular) extent data disk byte 1107611648 nr 245760 extent data offset 0 nr 172032 ram 245760 extent compression 0 (none) item 9 key (271 EXTENT_DATA 2207744) itemoff 3409 itemsize 53 generation 20 type 2 (prealloc) prealloc data disk byte 1107611648 nr 245760 prealloc data offset 172032 nr 73728 Instead of the disk byte value of 1106776064, the value of 1107611648 should have been logged. Also the data offset value should have been 172032 and not 1007616. After a log replay we end up getting two extent items in the extent tree with different lengths, one of 835584, which is correct and existed before the log replay, and another one of 1032192 which is wrong and is based on the logged file extent item: item 12 key (1106776064 EXTENT_ITEM 835584) itemoff 3406 itemsize 53 refs 2 gen 15 flags DATA extent data backref root 5 objectid 271 offset 1200128 count 2 item 13 key (1106776064 EXTENT_ITEM 1032192) itemoff 3353 itemsize 53 refs 1 gen 22 flags DATA extent data backref root 5 objectid 271 offset 1200128 count 1 Obviously this leads to many problems and a filesystem check reports many errors: (...) checking extents Extent back ref already exists for 1106776064 parent 0 root 5 owner 271 offset 1200128 num_refs 1 extent item 1106776064 has multiple extent items ref mismatch on [1106776064 835584] extent item 2, found 3 Incorrect local backref count on 1106776064 root 5 owner 271 offset 1200128 found 2 wanted 1 back 0x55b1d0ad7680 Backref 1106776064 root 5 owner 271 offset 1200128 num_refs 0 not found in extent tree Incorrect local backref count on 1106776064 root 5 owner 271 offset 1200128 found 1 wanted 0 back 0x55b1d0ad4e70 Backref bytes do not match extent backref, bytenr=1106776064, ref bytes=835584, backref bytes=1032192 backpointer mismatch on [1106776064 835584] checking free space cache block group 1103101952 has wrong amount of free space failed to load free space cache for block group 1103101952 checking fs roots (...) So fix this by logging the prealloc extents beyond the inode's i_size based on searches in the subvolume tree instead of the extent maps. Fixes: 471d557afed1 ("Btrfs: fix loss of prealloc extents past i_size after fsync log replay") CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 137 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 112 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index fc4c14a72366..bf4e22df7c97 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4214,6 +4214,110 @@ static int log_one_extent(struct btrfs_trans_handle *trans, return ret; } +/* + * Log all prealloc extents beyond the inode's i_size to make sure we do not + * lose them after doing a fast fsync and replaying the log. We scan the + * subvolume's root instead of iterating the inode's extent map tree because + * otherwise we can log incorrect extent items based on extent map conversion. + * That can happen due to the fact that extent maps are merged when they + * are not in the extent map tree's list of modified extents. + */ +static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode, + struct btrfs_path *path) +{ + struct btrfs_root *root = inode->root; + struct btrfs_key key; + const u64 i_size = i_size_read(&inode->vfs_inode); + const u64 ino = btrfs_ino(inode); + struct btrfs_path *dst_path = NULL; + u64 last_extent = (u64)-1; + int ins_nr = 0; + int start_slot; + int ret; + + if (!(inode->flags & BTRFS_INODE_PREALLOC)) + return 0; + + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = i_size; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + + while (true) { + struct extent_buffer *leaf = path->nodes[0]; + int slot = path->slots[0]; + + if (slot >= btrfs_header_nritems(leaf)) { + if (ins_nr > 0) { + ret = copy_items(trans, inode, dst_path, path, + &last_extent, start_slot, + ins_nr, 1, 0); + if (ret < 0) + goto out; + ins_nr = 0; + } + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + break; + } + continue; + } + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid > ino) + break; + if (WARN_ON_ONCE(key.objectid < ino) || + key.type < BTRFS_EXTENT_DATA_KEY || + key.offset < i_size) { + path->slots[0]++; + continue; + } + if (last_extent == (u64)-1) { + last_extent = key.offset; + /* + * Avoid logging extent items logged in past fsync calls + * and leading to duplicate keys in the log tree. + */ + do { + ret = btrfs_truncate_inode_items(trans, + root->log_root, + &inode->vfs_inode, + i_size, + BTRFS_EXTENT_DATA_KEY); + } while (ret == -EAGAIN); + if (ret) + goto out; + } + if (ins_nr == 0) + start_slot = slot; + ins_nr++; + path->slots[0]++; + if (!dst_path) { + dst_path = btrfs_alloc_path(); + if (!dst_path) { + ret = -ENOMEM; + goto out; + } + } + } + if (ins_nr > 0) { + ret = copy_items(trans, inode, dst_path, path, &last_extent, + start_slot, ins_nr, 1, 0); + if (ret > 0) + ret = 0; + } +out: + btrfs_release_path(path); + btrfs_free_path(dst_path); + return ret; +} + static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, @@ -4256,6 +4360,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, if (em->generation <= test_gen) continue; + /* We log prealloc extents beyond eof later. */ + if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) && + em->start >= i_size_read(&inode->vfs_inode)) + continue; + if (em->start < logged_start) logged_start = em->start; if ((em->start + em->len - 1) > logged_end) @@ -4268,31 +4377,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, num++; } - /* - * Add all prealloc extents beyond the inode's i_size to make sure we - * don't lose them after doing a fast fsync and replaying the log. - */ - if (inode->flags & BTRFS_INODE_PREALLOC) { - struct rb_node *node; - - for (node = rb_last(&tree->map); node; node = rb_prev(node)) { - em = rb_entry(node, struct extent_map, rb_node); - if (em->start < i_size_read(&inode->vfs_inode)) - break; - if (!list_empty(&em->list)) - continue; - /* Same as above loop. */ - if (++num > 32768) { - list_del_init(&tree->modified_extents); - ret = -EFBIG; - goto process; - } - refcount_inc(&em->refs); - set_bit(EXTENT_FLAG_LOGGING, &em->flags); - list_add_tail(&em->list, &extents); - } - } - list_sort(NULL, &extents, extent_cmp); btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end); /* @@ -4337,6 +4421,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, up_write(&inode->dio_sem); btrfs_release_path(path); + if (!ret) + ret = btrfs_log_prealloc_extents(trans, inode, path); + return ret; } -- GitLab From 1083a7e8130ca54cef63a5b1d6c86878d4b9e61d Mon Sep 17 00:00:00 2001 From: Prashanth Prakash Date: Fri, 27 Apr 2018 11:35:27 -0600 Subject: [PATCH 0176/1291] cpufreq / CPPC: Set platform specific transition_delay_us commit d4f3388afd488ed15368fa7413b8bd6d1f98bb1d upstream. Add support to specify platform specific transition_delay_us instead of using the transition delay derived from PCC. With commit 3d41386d556d (cpufreq: CPPC: Use transition_delay_us depending transition_latency) we are setting transition_delay_us directly and not applying the LATENCY_MULTIPLIER. Because of that, on Qualcomm Centriq we can end up with a very high rate of frequency change requests when using the schedutil governor (default rate_limit_us=10 compared to an earlier value of 10000). The PCC subspace describes the rate at which the platform can accept commands on the CPPC's PCC channel. This includes read and write command on the PCC channel that can be used for reasons other than frequency transitions. Moreover the same PCC subspace can be used by multiple freq domains and deriving transition_delay_us from it as we do now can be sub-optimal. Moreover if a platform does not use PCC for desired_perf register then there is no way to compute the transition latency or the delay_us. CPPC does not have a standard defined mechanism to get the transition rate or the latency at the moment. Given the above limitations, it is simpler to have a platform specific transition_delay_us and rely on PCC derived value only if a platform specific value is not available. Signed-off-by: Prashanth Prakash Cc: 4.14+ # 4.14+ Fixes: 3d41386d556d (cpufreq: CPPC: Use transition_delay_us depending transition_latency) Signed-off-by: Rafael J. Wysocki Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cppc_cpufreq.c | 46 ++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 8b432d6e846d..c9ce716247c1 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -126,6 +126,49 @@ static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy) cpu->perf_caps.lowest_perf, cpu_num, ret); } +/* + * The PCC subspace describes the rate at which platform can accept commands + * on the shared PCC channel (including READs which do not count towards freq + * trasition requests), so ideally we need to use the PCC values as a fallback + * if we don't have a platform specific transition_delay_us + */ +#ifdef CONFIG_ARM64 +#include + +static unsigned int cppc_cpufreq_get_transition_delay_us(int cpu) +{ + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_num = read_cpuid_part_number(); + unsigned int delay_us = 0; + + switch (implementor) { + case ARM_CPU_IMP_QCOM: + switch (part_num) { + case QCOM_CPU_PART_FALKOR_V1: + case QCOM_CPU_PART_FALKOR: + delay_us = 10000; + break; + default: + delay_us = cppc_get_transition_latency(cpu) / NSEC_PER_USEC; + break; + } + break; + default: + delay_us = cppc_get_transition_latency(cpu) / NSEC_PER_USEC; + break; + } + + return delay_us; +} + +#else + +static unsigned int cppc_cpufreq_get_transition_delay_us(int cpu) +{ + return cppc_get_transition_latency(cpu) / NSEC_PER_USEC; +} +#endif + static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) { struct cppc_cpudata *cpu; @@ -163,8 +206,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.max_freq = cppc_dmi_max_khz; policy->cpuinfo.transition_latency = cppc_get_transition_latency(cpu_num); - policy->transition_delay_us = cppc_get_transition_latency(cpu_num) / - NSEC_PER_USEC; + policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu_num); policy->shared_type = cpu->shared_type; if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { -- GitLab From f5778c2d657e04782553ccc285e396f3062deb73 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Mar 2018 14:23:16 -0400 Subject: [PATCH 0177/1291] xprtrdma: Fix corner cases when handling device removal commit 25524288631fc5b7d33259fca1e0dc38146be5d6 upstream. Michal Kalderon has found some corner cases around device unload with active NFS mounts that I didn't have the imagination to test when xprtrdma device removal was added last year. - The ULP device removal handler is responsible for deallocating the PD. That wasn't clear to me initially, and my own testing suggested it was not necessary, but that is incorrect. - The transport destruction path can no longer assume that there is a valid ID. - When destroying a transport, ensure that ib_free_cq() is not invoked on a CQ that was already released. Reported-by: Michal Kalderon Fixes: bebd031866ca ("xprtrdma: Support unplugging an HCA from ...") Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Anna Schumaker Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/verbs.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 97b9d4f671ac..2aaf46599126 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -270,7 +270,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) wait_for_completion(&ia->ri_remove_done); ia->ri_id = NULL; - ia->ri_pd = NULL; ia->ri_device = NULL; /* Return 1 to ensure the core destroys the id. */ return 1; @@ -464,7 +463,9 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) ia->ri_id->qp = NULL; } ib_free_cq(ep->rep_attr.recv_cq); + ep->rep_attr.recv_cq = NULL; ib_free_cq(ep->rep_attr.send_cq); + ep->rep_attr.send_cq = NULL; /* The ULP is responsible for ensuring all DMA * mappings and MRs are gone. @@ -477,6 +478,8 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); } rpcrdma_destroy_mrs(buf); + ib_dealloc_pd(ia->ri_pd); + ia->ri_pd = NULL; /* Allow waiters to continue */ complete(&ia->ri_remove_done); @@ -650,14 +653,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) cancel_delayed_work_sync(&ep->rep_connect_worker); - if (ia->ri_id->qp) { + if (ia->ri_id && ia->ri_id->qp) { rpcrdma_ep_disconnect(ep, ia); rdma_destroy_qp(ia->ri_id); ia->ri_id->qp = NULL; } - ib_free_cq(ep->rep_attr.recv_cq); - ib_free_cq(ep->rep_attr.send_cq); + if (ep->rep_attr.recv_cq) + ib_free_cq(ep->rep_attr.recv_cq); + if (ep->rep_attr.send_cq) + ib_free_cq(ep->rep_attr.send_cq); } /* Re-establish a connection after a device removal event. -- GitLab From c59a8f13f36b51f2100111121b39c6d15eca124d Mon Sep 17 00:00:00 2001 From: alex chen Date: Wed, 15 Nov 2017 17:31:48 -0800 Subject: [PATCH 0178/1291] ocfs2: subsystem.su_mutex is required while accessing the item->ci_parent commit 853bc26a7ea39e354b9f8889ae7ad1492ffa28d2 upstream. The subsystem.su_mutex is required while accessing the item->ci_parent, otherwise, NULL pointer dereference to the item->ci_parent will be triggered in the following situation: add node delete node sys_write vfs_write configfs_write_file o2nm_node_store o2nm_node_local_write do_rmdir vfs_rmdir configfs_rmdir mutex_lock(&subsys->su_mutex); unlink_obj item->ci_group = NULL; item->ci_parent = NULL; to_o2nm_cluster_from_node node->nd_item.ci_parent->ci_parent BUG since of NULL pointer dereference to nd_item.ci_parent Moreover, the o2nm_cluster also should be protected by the subsystem.su_mutex. [alex.chen@huawei.com: v2] Link: http://lkml.kernel.org/r/59EEAA69.9080703@huawei.com Link: http://lkml.kernel.org/r/59E9B36A.10700@huawei.com Signed-off-by: Alex Chen Reviewed-by: Jun Piao Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/cluster/nodemanager.c | 63 +++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 8 deletions(-) diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index b17d180bdc16..c204ac9b49e5 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -40,6 +40,9 @@ char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = { "panic", /* O2NM_FENCE_PANIC */ }; +static inline void o2nm_lock_subsystem(void); +static inline void o2nm_unlock_subsystem(void); + struct o2nm_node *o2nm_get_node_by_num(u8 node_num) { struct o2nm_node *node = NULL; @@ -181,7 +184,10 @@ static struct o2nm_cluster *to_o2nm_cluster_from_node(struct o2nm_node *node) { /* through the first node_set .parent * mycluster/nodes/mynode == o2nm_cluster->o2nm_node_group->o2nm_node */ - return to_o2nm_cluster(node->nd_item.ci_parent->ci_parent); + if (node->nd_item.ci_parent) + return to_o2nm_cluster(node->nd_item.ci_parent->ci_parent); + else + return NULL; } enum { @@ -194,7 +200,7 @@ static ssize_t o2nm_node_num_store(struct config_item *item, const char *page, size_t count) { struct o2nm_node *node = to_o2nm_node(item); - struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node); + struct o2nm_cluster *cluster; unsigned long tmp; char *p = (char *)page; int ret = 0; @@ -214,6 +220,13 @@ static ssize_t o2nm_node_num_store(struct config_item *item, const char *page, !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes)) return -EINVAL; /* XXX */ + o2nm_lock_subsystem(); + cluster = to_o2nm_cluster_from_node(node); + if (!cluster) { + o2nm_unlock_subsystem(); + return -EINVAL; + } + write_lock(&cluster->cl_nodes_lock); if (cluster->cl_nodes[tmp]) ret = -EEXIST; @@ -226,6 +239,8 @@ static ssize_t o2nm_node_num_store(struct config_item *item, const char *page, set_bit(tmp, cluster->cl_nodes_bitmap); } write_unlock(&cluster->cl_nodes_lock); + o2nm_unlock_subsystem(); + if (ret) return ret; @@ -269,7 +284,7 @@ static ssize_t o2nm_node_ipv4_address_store(struct config_item *item, size_t count) { struct o2nm_node *node = to_o2nm_node(item); - struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node); + struct o2nm_cluster *cluster; int ret, i; struct rb_node **p, *parent; unsigned int octets[4]; @@ -286,6 +301,13 @@ static ssize_t o2nm_node_ipv4_address_store(struct config_item *item, be32_add_cpu(&ipv4_addr, octets[i] << (i * 8)); } + o2nm_lock_subsystem(); + cluster = to_o2nm_cluster_from_node(node); + if (!cluster) { + o2nm_unlock_subsystem(); + return -EINVAL; + } + ret = 0; write_lock(&cluster->cl_nodes_lock); if (o2nm_node_ip_tree_lookup(cluster, ipv4_addr, &p, &parent)) @@ -298,6 +320,8 @@ static ssize_t o2nm_node_ipv4_address_store(struct config_item *item, rb_insert_color(&node->nd_ip_node, &cluster->cl_node_ip_tree); } write_unlock(&cluster->cl_nodes_lock); + o2nm_unlock_subsystem(); + if (ret) return ret; @@ -315,7 +339,7 @@ static ssize_t o2nm_node_local_store(struct config_item *item, const char *page, size_t count) { struct o2nm_node *node = to_o2nm_node(item); - struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node); + struct o2nm_cluster *cluster; unsigned long tmp; char *p = (char *)page; ssize_t ret; @@ -333,17 +357,26 @@ static ssize_t o2nm_node_local_store(struct config_item *item, const char *page, !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes)) return -EINVAL; /* XXX */ + o2nm_lock_subsystem(); + cluster = to_o2nm_cluster_from_node(node); + if (!cluster) { + ret = -EINVAL; + goto out; + } + /* the only failure case is trying to set a new local node * when a different one is already set */ if (tmp && tmp == cluster->cl_has_local && - cluster->cl_local_node != node->nd_num) - return -EBUSY; + cluster->cl_local_node != node->nd_num) { + ret = -EBUSY; + goto out; + } /* bring up the rx thread if we're setting the new local node. */ if (tmp && !cluster->cl_has_local) { ret = o2net_start_listening(node); if (ret) - return ret; + goto out; } if (!tmp && cluster->cl_has_local && @@ -358,7 +391,11 @@ static ssize_t o2nm_node_local_store(struct config_item *item, const char *page, cluster->cl_local_node = node->nd_num; } - return count; + ret = count; + +out: + o2nm_unlock_subsystem(); + return ret; } CONFIGFS_ATTR(o2nm_node_, num); @@ -738,6 +775,16 @@ static struct o2nm_cluster_group o2nm_cluster_group = { }, }; +static inline void o2nm_lock_subsystem(void) +{ + mutex_lock(&o2nm_cluster_group.cs_subsys.su_mutex); +} + +static inline void o2nm_unlock_subsystem(void) +{ + mutex_unlock(&o2nm_cluster_group.cs_subsys.su_mutex); +} + int o2nm_depend_item(struct config_item *item) { return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item); -- GitLab From 1ccab2bf726e1cd9292deecf4d72d732527035d6 Mon Sep 17 00:00:00 2001 From: alex chen Date: Wed, 15 Nov 2017 17:31:44 -0800 Subject: [PATCH 0179/1291] ocfs2: ip_alloc_sem should be taken in ocfs2_get_block() commit 3e4c56d41eef5595035872a2ec5a483f42e8917f upstream. ip_alloc_sem should be taken in ocfs2_get_block() when reading file in DIRECT mode to prevent concurrent access to extent tree with ocfs2_dio_end_io_write(), which may cause BUGON in the following situation: read file 'A' end_io of writing file 'A' vfs_read __vfs_read ocfs2_file_read_iter generic_file_read_iter ocfs2_direct_IO __blockdev_direct_IO do_blockdev_direct_IO do_direct_IO get_more_blocks ocfs2_get_block ocfs2_extent_map_get_blocks ocfs2_get_clusters ocfs2_get_clusters_nocache() ocfs2_search_extent_list return the index of record which contains the v_cluster, that is v_cluster > rec[i]->e_cpos. ocfs2_dio_end_io ocfs2_dio_end_io_write down_write(&oi->ip_alloc_sem); ocfs2_mark_extent_written ocfs2_change_extent_flag ocfs2_split_extent ... --> modify the rec[i]->e_cpos, resulting in v_cluster < rec[i]->e_cpos. BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)) [alex.chen@huawei.com: v3] Link: http://lkml.kernel.org/r/59EF3614.6050008@huawei.com Link: http://lkml.kernel.org/r/59EF3614.6050008@huawei.com Fixes: c15471f79506 ("ocfs2: fix sparse file & data ordering issue in direct io") Signed-off-by: Alex Chen Reviewed-by: Jun Piao Reviewed-by: Joseph Qi Reviewed-by: Gang He Acked-by: Changwei Ge Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/aops.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 88a31e9340a0..d1516327b787 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -134,6 +134,19 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, return err; } +static int ocfs2_lock_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + int ret = 0; + struct ocfs2_inode_info *oi = OCFS2_I(inode); + + down_read(&oi->ip_alloc_sem); + ret = ocfs2_get_block(inode, iblock, bh_result, create); + up_read(&oi->ip_alloc_sem); + + return ret; +} + int ocfs2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -2128,7 +2141,7 @@ static void ocfs2_dio_free_write_ctx(struct inode *inode, * called like this: dio->get_blocks(dio->inode, fs_startblk, * fs_count, map_bh, dio->rw == WRITE); */ -static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock, +static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); @@ -2154,12 +2167,9 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock, * while file size will be changed. */ if (pos + total_len <= i_size_read(inode)) { - down_read(&oi->ip_alloc_sem); - /* This is the fast path for re-write. */ - ret = ocfs2_get_block(inode, iblock, bh_result, create); - - up_read(&oi->ip_alloc_sem); + /* This is the fast path for re-write. */ + ret = ocfs2_lock_get_block(inode, iblock, bh_result, create); if (buffer_mapped(bh_result) && !buffer_new(bh_result) && ret == 0) @@ -2424,9 +2434,9 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) return 0; if (iov_iter_rw(iter) == READ) - get_block = ocfs2_get_block; + get_block = ocfs2_lock_get_block; else - get_block = ocfs2_dio_get_block; + get_block = ocfs2_dio_wr_get_block; return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, get_block, -- GitLab From b913a05ab75e806db1ef6d54fb206a40dd4d5f1b Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 1 Oct 2017 13:02:15 +0200 Subject: [PATCH 0180/1291] bcm63xx_enet: correct clock usage commit 9c86b846ce02f7e35d7234cf090b80553eba5389 upstream. Check the return code of prepare_enable and change one last instance of enable only to prepare_enable. Also properly disable and release the clock in error paths and on remove for enetsw. Signed-off-by: Jonas Gorski Signed-off-by: David S. Miller Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 31 +++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 4f3845a58126..b7d8b6600be6 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1773,7 +1773,9 @@ static int bcm_enet_probe(struct platform_device *pdev) ret = PTR_ERR(priv->mac_clk); goto out; } - clk_prepare_enable(priv->mac_clk); + ret = clk_prepare_enable(priv->mac_clk); + if (ret) + goto out_put_clk_mac; /* initialize default and fetch platform data */ priv->rx_ring_size = BCMENET_DEF_RX_DESC; @@ -1805,9 +1807,11 @@ static int bcm_enet_probe(struct platform_device *pdev) if (IS_ERR(priv->phy_clk)) { ret = PTR_ERR(priv->phy_clk); priv->phy_clk = NULL; - goto out_put_clk_mac; + goto out_disable_clk_mac; } - clk_prepare_enable(priv->phy_clk); + ret = clk_prepare_enable(priv->phy_clk); + if (ret) + goto out_put_clk_phy; } /* do minimal hardware init to be able to probe mii bus */ @@ -1901,13 +1905,16 @@ static int bcm_enet_probe(struct platform_device *pdev) out_uninit_hw: /* turn off mdc clock */ enet_writel(priv, 0, ENET_MIISC_REG); - if (priv->phy_clk) { + if (priv->phy_clk) clk_disable_unprepare(priv->phy_clk); + +out_put_clk_phy: + if (priv->phy_clk) clk_put(priv->phy_clk); - } -out_put_clk_mac: +out_disable_clk_mac: clk_disable_unprepare(priv->mac_clk); +out_put_clk_mac: clk_put(priv->mac_clk); out: free_netdev(dev); @@ -2752,7 +2759,9 @@ static int bcm_enetsw_probe(struct platform_device *pdev) ret = PTR_ERR(priv->mac_clk); goto out_unmap; } - clk_enable(priv->mac_clk); + ret = clk_prepare_enable(priv->mac_clk); + if (ret) + goto out_put_clk; priv->rx_chan = 0; priv->tx_chan = 1; @@ -2773,7 +2782,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev) ret = register_netdev(dev); if (ret) - goto out_put_clk; + goto out_disable_clk; netif_carrier_off(dev); platform_set_drvdata(pdev, dev); @@ -2782,6 +2791,9 @@ static int bcm_enetsw_probe(struct platform_device *pdev) return 0; +out_disable_clk: + clk_disable_unprepare(priv->mac_clk); + out_put_clk: clk_put(priv->mac_clk); @@ -2813,6 +2825,9 @@ static int bcm_enetsw_remove(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(res->start, resource_size(res)); + clk_disable_unprepare(priv->mac_clk); + clk_put(priv->mac_clk); + free_netdev(dev); return 0; } -- GitLab From b1c3ce0cfff275ee2e88fe505a8f44712303f895 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 1 Oct 2017 13:02:16 +0200 Subject: [PATCH 0181/1291] bcm63xx_enet: do not write to random DMA channel on BCM6345 commit d6213c1f2ad54a964b77471690264ed685718928 upstream. The DMA controller regs actually point to DMA channel 0, so the write to ENETDMA_CFG_REG will actually modify a random DMA channel. Since DMA controller registers do not exist on BCM6345, guard the write with the usual check for dma_has_sram. Signed-off-by: Jonas Gorski Signed-off-by: David S. Miller Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index b7d8b6600be6..68470c7c630a 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1062,7 +1062,8 @@ static int bcm_enet_open(struct net_device *dev) val = enet_readl(priv, ENET_CTL_REG); val |= ENET_CTL_ENABLE_MASK; enet_writel(priv, val, ENET_CTL_REG); - enet_dma_writel(priv, ENETDMA_CFG_EN_MASK, ENETDMA_CFG_REG); + if (priv->dma_has_sram) + enet_dma_writel(priv, ENETDMA_CFG_EN_MASK, ENETDMA_CFG_REG); enet_dmac_writel(priv, priv->dma_chan_en_mask, ENETDMAC_CHANCFG, priv->rx_chan); -- GitLab From dc3782a3e9c61872df8c8ed395b60d3b7ad782b8 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Mon, 22 Jan 2018 11:28:54 +0900 Subject: [PATCH 0182/1291] PCI: exynos: Fix a potential init_clk_resources NULL pointer dereference commit b5d6bc90c9129279d363ccbc02ad11e7b657c0b4 upstream. In order to avoid triggering a NULL pointer dereference in exynos_pcie_probe() a check must be put in place to detect if the init_clk_resources hook is initialized before calling it. Add the respective function pointer check in exynos_pcie_probe(). Signed-off-by: Jaehoon Chung [lorenzo.pieralisi@arm.com: rewrote the commit log] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/pci/dwc/pci-exynos.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/dwc/pci-exynos.c b/drivers/pci/dwc/pci-exynos.c index 5596fdedbb94..ea03f1ec12a4 100644 --- a/drivers/pci/dwc/pci-exynos.c +++ b/drivers/pci/dwc/pci-exynos.c @@ -695,7 +695,8 @@ static int __init exynos_pcie_probe(struct platform_device *pdev) return ret; } - if (ep->ops && ep->ops->get_clk_resources) { + if (ep->ops && ep->ops->get_clk_resources && + ep->ops->init_clk_resources) { ret = ep->ops->get_clk_resources(ep); if (ret) return ret; -- GitLab From 996a6a393b3f82f306ec65f25756a31f51ca3967 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 25 Aug 2017 15:47:14 +0200 Subject: [PATCH 0183/1291] crypto: crypto4xx - remove bad list_del commit a728a196d253530f17da5c86dc7dfbe58c5f7094 upstream. alg entries are only added to the list, after the registration was successful. If the registration failed, it was never added to the list in the first place. Signed-off-by: Christian Lamparter Signed-off-by: Herbert Xu Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/amcc/crypto4xx_core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index 65dc78b91dea..3118cec0d81e 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -1033,12 +1033,10 @@ int crypto4xx_register_alg(struct crypto4xx_device *sec_dev, break; } - if (rc) { - list_del(&alg->entry); + if (rc) kfree(alg); - } else { + else list_add_tail(&alg->entry, &sec_dev->alg_list); - } } return 0; -- GitLab From 03bb9187754e2c82626dafcd3e12a137cbb952ce Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 25 Aug 2017 15:47:24 +0200 Subject: [PATCH 0184/1291] crypto: crypto4xx - fix crypto4xx_build_pdr, crypto4xx_build_sdr leak commit 5d59ad6eea82ef8df92b4109615a0dde9d8093e9 upstream. If one of the later memory allocations in rypto4xx_build_pdr() fails: dev->pdr (and/or) dev->pdr_uinfo wouldn't be freed. crypto4xx_build_sdr() has the same issue with dev->sdr. Signed-off-by: Christian Lamparter Signed-off-by: Herbert Xu Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/amcc/crypto4xx_core.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index 3118cec0d81e..3f9eee7e555f 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -207,7 +207,7 @@ static u32 crypto4xx_build_pdr(struct crypto4xx_device *dev) dev->pdr_pa); return -ENOMEM; } - memset(dev->pdr, 0, sizeof(struct ce_pd) * PPC4XX_NUM_PD); + memset(dev->pdr, 0, sizeof(struct ce_pd) * PPC4XX_NUM_PD); dev->shadow_sa_pool = dma_alloc_coherent(dev->core_dev->device, 256 * PPC4XX_NUM_PD, &dev->shadow_sa_pool_pa, @@ -240,13 +240,15 @@ static u32 crypto4xx_build_pdr(struct crypto4xx_device *dev) static void crypto4xx_destroy_pdr(struct crypto4xx_device *dev) { - if (dev->pdr != NULL) + if (dev->pdr) dma_free_coherent(dev->core_dev->device, sizeof(struct ce_pd) * PPC4XX_NUM_PD, dev->pdr, dev->pdr_pa); + if (dev->shadow_sa_pool) dma_free_coherent(dev->core_dev->device, 256 * PPC4XX_NUM_PD, dev->shadow_sa_pool, dev->shadow_sa_pool_pa); + if (dev->shadow_sr_pool) dma_free_coherent(dev->core_dev->device, sizeof(struct sa_state_record) * PPC4XX_NUM_PD, @@ -416,12 +418,12 @@ static u32 crypto4xx_build_sdr(struct crypto4xx_device *dev) static void crypto4xx_destroy_sdr(struct crypto4xx_device *dev) { - if (dev->sdr != NULL) + if (dev->sdr) dma_free_coherent(dev->core_dev->device, sizeof(struct ce_sd) * PPC4XX_NUM_SD, dev->sdr, dev->sdr_pa); - if (dev->scatter_buffer_va != NULL) + if (dev->scatter_buffer_va) dma_free_coherent(dev->core_dev->device, dev->scatter_buffer_size * PPC4XX_NUM_SD, dev->scatter_buffer_va, @@ -1191,7 +1193,7 @@ static int crypto4xx_probe(struct platform_device *ofdev) rc = crypto4xx_build_gdr(core_dev->dev); if (rc) - goto err_build_gdr; + goto err_build_pdr; rc = crypto4xx_build_sdr(core_dev->dev); if (rc) @@ -1234,12 +1236,11 @@ static int crypto4xx_probe(struct platform_device *ofdev) err_request_irq: irq_dispose_mapping(core_dev->irq); tasklet_kill(&core_dev->tasklet); - crypto4xx_destroy_sdr(core_dev->dev); err_build_sdr: + crypto4xx_destroy_sdr(core_dev->dev); crypto4xx_destroy_gdr(core_dev->dev); -err_build_gdr: - crypto4xx_destroy_pdr(core_dev->dev); err_build_pdr: + crypto4xx_destroy_pdr(core_dev->dev); kfree(core_dev->dev); err_alloc_dev: kfree(core_dev); -- GitLab From c62e2f087af11564ecbd1c0d08bbd84ac92afc07 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 29 Jun 2018 17:51:26 +0200 Subject: [PATCH 0185/1291] alx: take rtnl before calling __alx_open from resume [ Upstream commit bc800e8b39bad60ccdb83be828da63af71ab87b3 ] The __alx_open function can be called from ndo_open, which is called under RTNL, or from alx_resume, which isn't. Since commit d768319cd427, we're calling the netif_set_real_num_{tx,rx}_queues functions, which need to be called under RTNL. This is similar to commit 0c2cc02e571a ("igb: Move the calls to set the Tx and Rx queues into igb_open"). Fixes: d768319cd427 ("alx: enable multiple tx queues") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/atheros/alx/main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 567ee54504bc..5e5022fa1d04 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1897,13 +1897,19 @@ static int alx_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct alx_priv *alx = pci_get_drvdata(pdev); struct alx_hw *hw = &alx->hw; + int err; alx_reset_phy(hw); if (!netif_running(alx->dev)) return 0; netif_device_attach(alx->dev); - return __alx_open(alx, true); + + rtnl_lock(); + err = __alx_open(alx, true); + rtnl_unlock(); + + return err; } static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume); -- GitLab From f93d65939a4a80f31e50af88bbd5fcae33266009 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 16 Jun 2018 11:55:44 +0100 Subject: [PATCH 0186/1291] atm: Preserve value of skb->truesize when accounting to vcc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9bbe60a67be5a1c6f79b3c9be5003481a50529ff ] ATM accounts for in-flight TX packets in sk_wmem_alloc of the VCC on which they are to be sent. But it doesn't take ownership of those packets from the sock (if any) which originally owned them. They should remain owned by their actual sender until they've left the box. There's a hack in pskb_expand_head() to avoid adjusting skb->truesize for certain skbs, precisely to avoid messing up sk_wmem_alloc accounting. Ideally that hack would cover the ATM use case too, but it doesn't — skbs which aren't owned by any sock, for example PPP control frames, still get their truesize adjusted when the low-level ATM driver adds headroom. This has always been an issue, it seems. The truesize of a packet increases, and sk_wmem_alloc on the VCC goes negative. But this wasn't for normal traffic, only for control frames. So I think we just got away with it, and we probably needed to send 2GiB of LCP echo frames before the misaccounting would ever have caused a problem and caused atm_may_send() to start refusing packets. Commit 14afee4b609 ("net: convert sock.sk_wmem_alloc from atomic_t to refcount_t") did exactly what it was intended to do, and turned this mostly-theoretical problem into a real one, causing PPPoATM to fail immediately as sk_wmem_alloc underflows and atm_may_send() *immediately* starts refusing to allow new packets. The least intrusive solution to this problem is to stash the value of skb->truesize that was accounted to the VCC, in a new member of the ATM_SKB(skb) structure. Then in atm_pop_raw() subtract precisely that value instead of the then-current value of skb->truesize. Fixes: 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()") Signed-off-by: David Woodhouse Tested-by: Kevin Darbyshire-Bryant Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/atmdev.h | 15 +++++++++++++++ net/atm/br2684.c | 3 +-- net/atm/clip.c | 3 +-- net/atm/common.c | 3 +-- net/atm/lec.c | 3 +-- net/atm/mpc.c | 3 +-- net/atm/pppoatm.c | 3 +-- net/atm/raw.c | 4 ++-- 8 files changed, 23 insertions(+), 14 deletions(-) diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 0c27515d2cf6..8124815eb121 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -214,6 +214,7 @@ struct atmphy_ops { struct atm_skb_data { struct atm_vcc *vcc; /* ATM VCC */ unsigned long atm_options; /* ATM layer options */ + unsigned int acct_truesize; /* truesize accounted to vcc */ }; #define VCC_HTABLE_SIZE 32 @@ -241,6 +242,20 @@ void vcc_insert_socket(struct sock *sk); void atm_dev_release_vccs(struct atm_dev *dev); +static inline void atm_account_tx(struct atm_vcc *vcc, struct sk_buff *skb) +{ + /* + * Because ATM skbs may not belong to a sock (and we don't + * necessarily want to), skb->truesize may be adjusted, + * escaping the hack in pskb_expand_head() which avoids + * doing so for some cases. So stash the value of truesize + * at the time we accounted it, and atm_pop_raw() can use + * that value later, in case it changes. + */ + refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + ATM_SKB(skb)->acct_truesize = skb->truesize; + ATM_SKB(skb)->atm_options = vcc->atm_options; +} static inline void atm_force_charge(struct atm_vcc *vcc,int truesize) { diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 4e111196f902..bc21f8e8daf2 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -252,8 +252,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev, ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); - refcount_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); - ATM_SKB(skb)->atm_options = atmvcc->atm_options; + atm_account_tx(atmvcc, skb); dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; diff --git a/net/atm/clip.c b/net/atm/clip.c index 65f706e4344c..60920a42f640 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -381,8 +381,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, memcpy(here, llc_oui, sizeof(llc_oui)); ((__be16 *) here)[3] = skb->protocol; } - refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); - ATM_SKB(skb)->atm_options = vcc->atm_options; + atm_account_tx(vcc, skb); entry->vccs->last_use = jiffies; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); old = xchg(&entry->vccs->xoff, 1); /* assume XOFF ... */ diff --git a/net/atm/common.c b/net/atm/common.c index 8a4f99114cd2..9e812c782a37 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -630,10 +630,9 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) goto out; } pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); - refcount_add(skb->truesize, &sk->sk_wmem_alloc); + atm_account_tx(vcc, skb); skb->dev = NULL; /* for paths shared with net_device interfaces */ - ATM_SKB(skb)->atm_options = vcc->atm_options; if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { kfree_skb(skb); error = -EFAULT; diff --git a/net/atm/lec.c b/net/atm/lec.c index 5741b6474dd9..9f2365694ad4 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -182,9 +182,8 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb) struct net_device *dev = skb->dev; ATM_SKB(skb)->vcc = vcc; - ATM_SKB(skb)->atm_options = vcc->atm_options; + atm_account_tx(vcc, skb); - refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); if (vcc->send(vcc, skb) < 0) { dev->stats.tx_dropped++; return; diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 5677147209e8..db9a1838687c 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -555,8 +555,7 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc) sizeof(struct llc_snap_hdr)); } - refcount_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc); - ATM_SKB(skb)->atm_options = entry->shortcut->atm_options; + atm_account_tx(entry->shortcut, skb); entry->shortcut->send(entry->shortcut, skb); entry->packets_fwded++; mpc->in_ops->put(entry); diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index 21d9d341a619..af8c4b38b746 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -350,8 +350,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb) return 1; } - refcount_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc); - ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options; + atm_account_tx(vcc, skb); pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev); ret = ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb) diff --git a/net/atm/raw.c b/net/atm/raw.c index ee10e8d46185..b3ba44aab0ee 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -35,8 +35,8 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb) struct sock *sk = sk_atm(vcc); pr_debug("(%d) %d -= %d\n", - vcc->vci, sk_wmem_alloc_get(sk), skb->truesize); - WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); + vcc->vci, sk_wmem_alloc_get(sk), ATM_SKB(skb)->acct_truesize); + WARN_ON(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, &sk->sk_wmem_alloc)); dev_kfree_skb_any(skb); sk->sk_write_space(sk); } -- GitLab From 43c9207d029648dce52a45cd07dfb832d8a7957a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 29 Jun 2018 13:28:07 -0500 Subject: [PATCH 0187/1291] atm: zatm: Fix potential Spectre v1 [ Upstream commit ced9e191501e52b95e1b57b8e0db00943869eed0 ] pool can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/atm/zatm.c:1491 zatm_ioctl() warn: potential spectre issue 'zatm_dev->pool_info' (local cap) Fix this by sanitizing pool before using it to index zatm_dev->pool_info Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/atm/zatm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index a8d2eb0ceb8d..2c288d1f42bb 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -1483,6 +1483,8 @@ static int zatm_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg) return -EFAULT; if (pool < 0 || pool > ZATM_LAST_POOL) return -EINVAL; + pool = array_index_nospec(pool, + ZATM_LAST_POOL + 1); if (copy_from_user(&info, &((struct zatm_pool_req __user *) arg)->info, sizeof(info))) return -EFAULT; -- GitLab From e34e92d8b689179a6715518346a687bc1e87911a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 29 Jun 2018 14:07:16 -0700 Subject: [PATCH 0188/1291] hv_netvsc: split sub-channel setup into async and sync [ Upstream commit 3ffe64f1a641b80a82d9ef4efa7a05ce69049871 ] When doing device hotplug the sub channel must be async to avoid deadlock issues because device is discovered in softirq context. When doing changes to MTU and number of channels, the setup must be synchronous to avoid races such as when MTU and device settings are done in a single ip command. Reported-by: Thomas Walker Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug") Fixes: 732e49850c5e ("netvsc: fix race on sub channel creation") Signed-off-by: Stephen Hemminger Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/hyperv_net.h | 2 +- drivers/net/hyperv/netvsc.c | 37 ++++++++++++++++++- drivers/net/hyperv/netvsc_drv.c | 17 ++++++++- drivers/net/hyperv/rndis_filter.c | 61 ++++++------------------------- 4 files changed, 65 insertions(+), 52 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 01017dd88802..cb250cacf721 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -207,7 +207,7 @@ int netvsc_recv_callback(struct net_device *net, void netvsc_channel_cb(void *context); int netvsc_poll(struct napi_struct *napi, int budget); -void rndis_set_subchannel(struct work_struct *w); +int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev); int rndis_filter_open(struct netvsc_device *nvdev); int rndis_filter_close(struct netvsc_device *nvdev); struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 4647ecbe6f36..701be5d81062 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -62,6 +62,41 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) VM_PKT_DATA_INBAND, 0); } +/* Worker to setup sub channels on initial setup + * Initial hotplug event occurs in softirq context + * and can't wait for channels. + */ +static void netvsc_subchan_work(struct work_struct *w) +{ + struct netvsc_device *nvdev = + container_of(w, struct netvsc_device, subchan_work); + struct rndis_device *rdev; + int i, ret; + + /* Avoid deadlock with device removal already under RTNL */ + if (!rtnl_trylock()) { + schedule_work(w); + return; + } + + rdev = nvdev->extension; + if (rdev) { + ret = rndis_set_subchannel(rdev->ndev, nvdev); + if (ret == 0) { + netif_device_attach(rdev->ndev); + } else { + /* fallback to only primary channel */ + for (i = 1; i < nvdev->num_chn; i++) + netif_napi_del(&nvdev->chan_table[i].napi); + + nvdev->max_chn = 1; + nvdev->num_chn = 1; + } + } + + rtnl_unlock(); +} + static struct netvsc_device *alloc_net_device(void) { struct netvsc_device *net_device; @@ -78,7 +113,7 @@ static struct netvsc_device *alloc_net_device(void) init_completion(&net_device->channel_init_wait); init_waitqueue_head(&net_device->subchan_open); - INIT_WORK(&net_device->subchan_work, rndis_set_subchannel); + INIT_WORK(&net_device->subchan_work, netvsc_subchan_work); return net_device; } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 6890478a0851..aeabeb107fed 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -911,8 +911,20 @@ static int netvsc_attach(struct net_device *ndev, if (IS_ERR(nvdev)) return PTR_ERR(nvdev); - /* Note: enable and attach happen when sub-channels setup */ + if (nvdev->num_chn > 1) { + ret = rndis_set_subchannel(ndev, nvdev); + + /* if unavailable, just proceed with one queue */ + if (ret) { + nvdev->max_chn = 1; + nvdev->num_chn = 1; + } + } + + /* In any case device is now ready */ + netif_device_attach(ndev); + /* Note: enable and attach happen when sub-channels setup */ netif_carrier_off(ndev); if (netif_running(ndev)) { @@ -2035,6 +2047,9 @@ static int netvsc_probe(struct hv_device *dev, memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + if (nvdev->num_chn > 1) + schedule_work(&nvdev->subchan_work); + /* hw_features computed in rndis_netdev_set_hwcaps() */ net->features = net->hw_features | NETIF_F_HIGHDMA | NETIF_F_SG | diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index d1ae184008b4..cb03a6ea076a 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1055,29 +1055,15 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) * This breaks overlap of processing the host message for the * new primary channel with the initialization of sub-channels. */ -void rndis_set_subchannel(struct work_struct *w) +int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev) { - struct netvsc_device *nvdev - = container_of(w, struct netvsc_device, subchan_work); struct nvsp_message *init_packet = &nvdev->channel_init_pkt; - struct net_device_context *ndev_ctx; - struct rndis_device *rdev; - struct net_device *ndev; - struct hv_device *hv_dev; + struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct hv_device *hv_dev = ndev_ctx->device_ctx; + struct rndis_device *rdev = nvdev->extension; int i, ret; - if (!rtnl_trylock()) { - schedule_work(w); - return; - } - - rdev = nvdev->extension; - if (!rdev) - goto unlock; /* device was removed */ - - ndev = rdev->ndev; - ndev_ctx = netdev_priv(ndev); - hv_dev = ndev_ctx->device_ctx; + ASSERT_RTNL(); memset(init_packet, 0, sizeof(struct nvsp_message)); init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL; @@ -1091,13 +1077,13 @@ void rndis_set_subchannel(struct work_struct *w) VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret) { netdev_err(ndev, "sub channel allocate send failed: %d\n", ret); - goto failed; + return ret; } wait_for_completion(&nvdev->channel_init_wait); if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) { netdev_err(ndev, "sub channel request failed\n"); - goto failed; + return -EIO; } nvdev->num_chn = 1 + @@ -1116,21 +1102,7 @@ void rndis_set_subchannel(struct work_struct *w) for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) ndev_ctx->tx_table[i] = i % nvdev->num_chn; - netif_device_attach(ndev); - rtnl_unlock(); - return; - -failed: - /* fallback to only primary channel */ - for (i = 1; i < nvdev->num_chn; i++) - netif_napi_del(&nvdev->chan_table[i].napi); - - nvdev->max_chn = 1; - nvdev->num_chn = 1; - - netif_device_attach(ndev); -unlock: - rtnl_unlock(); + return 0; } static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, @@ -1321,21 +1293,12 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, netif_napi_add(net, &net_device->chan_table[i].napi, netvsc_poll, NAPI_POLL_WEIGHT); - if (net_device->num_chn > 1) - schedule_work(&net_device->subchan_work); + return net_device; out: - /* if unavailable, just proceed with one queue */ - if (ret) { - net_device->max_chn = 1; - net_device->num_chn = 1; - } - - /* No sub channels, device is ready */ - if (net_device->num_chn == 1) - netif_device_attach(net); - - return net_device; + /* setting up multiple channels failed */ + net_device->max_chn = 1; + net_device->num_chn = 1; err_dev_remv: rndis_filter_device_remove(dev, net_device); -- GitLab From d10c0baaae3f84185db3b3076a87d4343c9d62e6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 30 Jun 2018 15:26:56 -0700 Subject: [PATCH 0189/1291] ipv6: sr: fix passing wrong flags to crypto_alloc_shash() [ Upstream commit fc9c2029e37c3ae9efc28bf47045e0b87e09660c ] The 'mask' argument to crypto_alloc_shash() uses the CRYPTO_ALG_* flags, not 'gfp_t'. So don't pass GFP_KERNEL to it. Fixes: bf355b8d2c30 ("ipv6: sr: add core files for SR HMAC support") Signed-off-by: Eric Biggers Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/seg6_hmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 33fb35cbfac1..558fe8cc6d43 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -373,7 +373,7 @@ static int seg6_hmac_init_algo(void) return -ENOMEM; for_each_possible_cpu(cpu) { - tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL); + tfm = crypto_alloc_shash(algo->name, 0, 0); if (IS_ERR(tfm)) return PTR_ERR(tfm); p_tfm = per_cpu_ptr(algo->tfms, cpu); -- GitLab From f5a42d63f0d417654a8a4d4448015e1c642ee338 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 21 Jun 2018 12:56:04 +0800 Subject: [PATCH 0190/1291] ipvlan: fix IFLA_MTU ignored on NEWLINK [ Upstream commit 30877961b1cdd6fdca783c2e8c4f0f47e95dc58c ] Commit 296d48568042 ("ipvlan: inherit MTU from master device") adjusted the mtu from the master device when creating a ipvlan device, but it would also override the mtu value set in rtnl_create_link. It causes IFLA_MTU param not to take effect. So this patch is to not adjust the mtu if IFLA_MTU param is set when creating a ipvlan device. Fixes: 296d48568042 ("ipvlan: inherit MTU from master device") Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index c74893c1e620..e7f7a1a002ee 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -546,7 +546,8 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev, ipvlan->dev = dev; ipvlan->port = port; ipvlan->sfeatures = IPVLAN_FEATURES; - ipvlan_adjust_mtu(ipvlan, phy_dev); + if (!tb[IFLA_MTU]) + ipvlan_adjust_mtu(ipvlan, phy_dev); INIT_LIST_HEAD(&ipvlan->addrs); /* If the port-id base is at the MAX value, then wrap it around and -- GitLab From a2e53d69f68587d5272d45e2f4674746977bd5b2 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 26 Jun 2018 17:39:48 +0200 Subject: [PATCH 0191/1291] ixgbe: split XDP_TX tail and XDP_REDIRECT map flushing [ Upstream commit ad088ec480768850db019a5cc543685e868a513d ] The driver was combining the XDP_TX tail flush and XDP_REDIRECT map flushing (xdp_do_flush_map). This is suboptimal, these two flush operations should be kept separate. Fixes: 11393cc9b9be ("xdp: Add batching support to redirect map") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 20a8018d41ef..b68d94b49a8a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2211,9 +2211,10 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, return skb; } -#define IXGBE_XDP_PASS 0 -#define IXGBE_XDP_CONSUMED 1 -#define IXGBE_XDP_TX 2 +#define IXGBE_XDP_PASS 0 +#define IXGBE_XDP_CONSUMED BIT(0) +#define IXGBE_XDP_TX BIT(1) +#define IXGBE_XDP_REDIR BIT(2) static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, struct xdp_buff *xdp); @@ -2242,7 +2243,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, case XDP_REDIRECT: err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog); if (!err) - result = IXGBE_XDP_TX; + result = IXGBE_XDP_REDIR; else result = IXGBE_XDP_CONSUMED; break; @@ -2302,7 +2303,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, unsigned int mss = 0; #endif /* IXGBE_FCOE */ u16 cleaned_count = ixgbe_desc_unused(rx_ring); - bool xdp_xmit = false; + unsigned int xdp_xmit = 0; while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; @@ -2342,8 +2343,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, } if (IS_ERR(skb)) { - if (PTR_ERR(skb) == -IXGBE_XDP_TX) { - xdp_xmit = true; + unsigned int xdp_res = -PTR_ERR(skb); + + if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) { + xdp_xmit |= xdp_res; ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size); } else { rx_buffer->pagecnt_bias++; @@ -2415,7 +2418,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, total_rx_packets++; } - if (xdp_xmit) { + if (xdp_xmit & IXGBE_XDP_REDIR) + xdp_do_flush_map(); + + if (xdp_xmit & IXGBE_XDP_TX) { struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()]; /* Force memory writes to complete before letting h/w @@ -2423,8 +2429,6 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, */ wmb(); writel(ring->next_to_use, ring->tail); - - xdp_do_flush_map(); } u64_stats_update_begin(&rx_ring->syncp); -- GitLab From a3225a836e56487b32f21ff1a8099c731256d10a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Jun 2018 06:44:14 -0700 Subject: [PATCH 0192/1291] net: dccp: avoid crash in ccid3_hc_rx_send_feedback() [ Upstream commit 74174fe5634ffbf645a7ca5a261571f700b2f332 ] On fast hosts or malicious bots, we trigger a DCCP_BUG() which seems excessive. syzbot reported : BUG: delta (-6195) <= 0 at net/dccp/ccids/ccid3.c:628/ccid3_hc_rx_send_feedback() CPU: 1 PID: 18 Comm: ksoftirqd/1 Not tainted 4.18.0-rc1+ #112 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 ccid3_hc_rx_send_feedback net/dccp/ccids/ccid3.c:628 [inline] ccid3_hc_rx_packet_recv.cold.16+0x38/0x71 net/dccp/ccids/ccid3.c:793 ccid_hc_rx_packet_recv net/dccp/ccid.h:185 [inline] dccp_deliver_input_to_ccids+0xf0/0x280 net/dccp/input.c:180 dccp_rcv_established+0x87/0xb0 net/dccp/input.c:378 dccp_v4_do_rcv+0x153/0x180 net/dccp/ipv4.c:654 sk_backlog_rcv include/net/sock.h:914 [inline] __sk_receive_skb+0x3ba/0xd80 net/core/sock.c:517 dccp_v4_rcv+0x10f9/0x1f58 net/dccp/ipv4.c:875 ip_local_deliver_finish+0x2eb/0xda0 net/ipv4/ip_input.c:215 NF_HOOK include/linux/netfilter.h:287 [inline] ip_local_deliver+0x1e9/0x750 net/ipv4/ip_input.c:256 dst_input include/net/dst.h:450 [inline] ip_rcv_finish+0x823/0x2220 net/ipv4/ip_input.c:396 NF_HOOK include/linux/netfilter.h:287 [inline] ip_rcv+0xa18/0x1284 net/ipv4/ip_input.c:492 __netif_receive_skb_core+0x2488/0x3680 net/core/dev.c:4628 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4693 process_backlog+0x219/0x760 net/core/dev.c:5373 napi_poll net/core/dev.c:5771 [inline] net_rx_action+0x7da/0x1980 net/core/dev.c:5837 __do_softirq+0x2e8/0xb17 kernel/softirq.c:284 run_ksoftirqd+0x86/0x100 kernel/softirq.c:645 smpboot_thread_fn+0x417/0x870 kernel/smpboot.c:164 kthread+0x345/0x410 kernel/kthread.c:240 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412 Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Gerrit Renker Cc: dccp@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ccids/ccid3.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 119c04317d48..b913ee062a81 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -624,9 +624,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk, case CCID3_FBACK_PERIODIC: delta = ktime_us_delta(now, hc->rx_tstamp_last_feedback); if (delta <= 0) - DCCP_BUG("delta (%ld) <= 0", (long)delta); - else - hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta); + delta = 1; + hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta); break; default: return; -- GitLab From fb6b14663d56afabf86a3a2078c37a8d28fc718a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Jun 2018 06:44:15 -0700 Subject: [PATCH 0193/1291] net: dccp: switch rx_tstamp_last_feedback to monotonic clock [ Upstream commit 0ce4e70ff00662ad7490e545ba0cd8c1fa179fca ] To compute delays, better not use time of the day which can be changed by admins or malicious programs. Also change ccid3_first_li() to use s64 type for delta variable to avoid potential overflows. Signed-off-by: Eric Dumazet Cc: Gerrit Renker Cc: dccp@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ccids/ccid3.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index b913ee062a81..03fcf3ee1534 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -599,7 +599,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk, { struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - ktime_t now = ktime_get_real(); + ktime_t now = ktime_get(); s64 delta = 0; switch (fbtype) { @@ -631,7 +631,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk, return; } - ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta, + ccid3_pr_debug("Interval %lldusec, X_recv=%u, 1/p=%u\n", delta, hc->rx_x_recv, hc->rx_pinv); hc->rx_tstamp_last_feedback = now; @@ -678,7 +678,8 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) static u32 ccid3_first_li(struct sock *sk) { struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); - u32 x_recv, p, delta; + u32 x_recv, p; + s64 delta; u64 fval; if (hc->rx_rtt == 0) { @@ -686,7 +687,9 @@ static u32 ccid3_first_li(struct sock *sk) hc->rx_rtt = DCCP_FALLBACK_RTT; } - delta = ktime_to_us(net_timedelta(hc->rx_tstamp_last_feedback)); + delta = ktime_us_delta(ktime_get(), hc->rx_tstamp_last_feedback); + if (delta <= 0) + delta = 1; x_recv = scaled_div32(hc->rx_bytes_recv, delta); if (x_recv == 0) { /* would also trigger divide-by-zero */ DCCP_WARN("X_recv==0\n"); -- GitLab From b364a914c4993f46c0d849dab20d67ba0d82809d Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Sat, 30 Jun 2018 17:38:55 +0200 Subject: [PATCH 0194/1291] net: fix use-after-free in GRO with ESP [ Upstream commit 603d4cf8fe095b1ee78f423d514427be507fb513 ] Since the addition of GRO for ESP, gro_receive can consume the skb and return -EINPROGRESS. In that case, the lower layer GRO handler cannot touch the skb anymore. Commit 5f114163f2f5 ("net: Add a skb_gro_flush_final helper.") converted some of the gro_receive handlers that can lead to ESP's gro_receive so that they wouldn't access the skb when -EINPROGRESS is returned, but missed other spots, mainly in tunneling protocols. This patch finishes the conversion to using skb_gro_flush_final(), and adds a new helper, skb_gro_flush_final_remcsum(), used in VXLAN and GUE. Fixes: 5f114163f2f5 ("net: Add a skb_gro_flush_final helper.") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/geneve.c | 2 +- drivers/net/vxlan.c | 4 +--- include/linux/netdevice.h | 20 ++++++++++++++++++++ net/8021q/vlan.c | 2 +- net/ipv4/fou.c | 4 +--- net/ipv4/gre_offload.c | 2 +- net/ipv4/udp_offload.c | 2 +- 7 files changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index fbc825ac97ab..cb51448389a1 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -474,7 +474,7 @@ static struct sk_buff **geneve_gro_receive(struct sock *sk, out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 3d9c5b35a4a7..bbdb46916dc3 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -623,9 +623,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk, flush = 0; out: - skb_gro_remcsum_cleanup(skb, &grc); - skb->remcsum_offload = 0; - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final_remcsum(skb, pp, flush, &grc); return pp; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 46bf7cc7d5d5..2ea7ee1fb495 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2668,11 +2668,31 @@ static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, if (PTR_ERR(pp) != -EINPROGRESS) NAPI_GRO_CB(skb)->flush |= flush; } +static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, + struct sk_buff **pp, + int flush, + struct gro_remcsum *grc) +{ + if (PTR_ERR(pp) != -EINPROGRESS) { + NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, grc); + skb->remcsum_offload = 0; + } +} #else static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) { NAPI_GRO_CB(skb)->flush |= flush; } +static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, + struct sk_buff **pp, + int flush, + struct gro_remcsum *grc) +{ + NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, grc); + skb->remcsum_offload = 0; +} #endif static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index cf2e70003a53..cf82d970b0e4 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -664,7 +664,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head, out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 1540db65241a..c9ec1603666b 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -448,9 +448,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk, out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; - skb_gro_remcsum_cleanup(skb, &grc); - skb->remcsum_offload = 0; + skb_gro_flush_final_remcsum(skb, pp, flush, &grc); return pp; } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 1859c473b21a..6a7d980105f6 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -223,7 +223,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index ea6e6e7df0ee..cde2719fcb89 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -295,7 +295,7 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } EXPORT_SYMBOL(udp_gro_receive); -- GitLab From f389c17b8dc54b474d33728e5882dcc276a467fa Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Wed, 20 Jun 2018 17:04:20 +0530 Subject: [PATCH 0195/1291] net: macb: Fix ptp time adjustment for large negative delta [ Upstream commit 64d7839af8c8f67daaf9bf387135052c55d85f90 ] When delta passed to gem_ptp_adjtime is negative, the sign is maintained in the ns_to_timespec64 conversion. Hence timespec_add should be used directly. timespec_sub will just subtract the negative value thus increasing the time difference. Signed-off-by: Harini Katakam Acked-by: Nicolas Ferre Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb_ptp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index 2220c771092b..678835136bf8 100755 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -170,10 +170,7 @@ static int gem_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) if (delta > TSU_NSEC_MAX_VAL) { gem_tsu_get_time(&bp->ptp_clock_info, &now); - if (sign) - now = timespec64_sub(now, then); - else - now = timespec64_add(now, then); + now = timespec64_add(now, then); gem_tsu_set_time(&bp->ptp_clock_info, (const struct timespec64 *)&now); -- GitLab From 1d8dda4444fa0e8799a40d74e3502aac841ea002 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 31 May 2018 11:32:56 +0300 Subject: [PATCH 0196/1291] net/mlx5e: Avoid dealing with vport representors if not being e-switch manager [ Upstream commit 733d3e5497070d05971352ca5087bac83c197c3d ] In smartnic env, the host (PF) driver might not be an e-switch manager, hence the switchdev mode representors are running on the embedded cpu (EC) and not at the host. As such, we should avoid dealing with vport representors if not being esw manager. While here, make sure to disallow eswitch switchdev related setups through devlink if we are not esw managers. Fixes: cb67b832921c ('net/mlx5e: Introduce SRIOV VF representors') Signed-off-by: Or Gerlitz Reviewed-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 ++++++------ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 337ce9423794..bf34264c734b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2626,7 +2626,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_activate_channels(&priv->channels); netif_tx_start_all_queues(priv->netdev); - if (MLX5_VPORT_MANAGER(priv->mdev)) + if (MLX5_ESWITCH_MANAGER(priv->mdev)) mlx5e_add_sqs_fwd_rules(priv); mlx5e_wait_channels_min_rx_wqes(&priv->channels); @@ -2637,7 +2637,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { mlx5e_redirect_rqts_to_drop(priv); - if (MLX5_VPORT_MANAGER(priv->mdev)) + if (MLX5_ESWITCH_MANAGER(priv->mdev)) mlx5e_remove_sqs_fwd_rules(priv); /* FIXME: This is a W/A only for tx timeout watch dog false alarm when @@ -4127,7 +4127,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) mlx5e_set_netdev_dev_addr(netdev); #if IS_ENABLED(CONFIG_MLX5_ESWITCH) - if (MLX5_VPORT_MANAGER(mdev)) + if (MLX5_ESWITCH_MANAGER(mdev)) netdev->switchdev_ops = &mlx5e_switchdev_ops; #endif @@ -4273,7 +4273,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_enable_async_events(priv); - if (MLX5_VPORT_MANAGER(priv->mdev)) + if (MLX5_ESWITCH_MANAGER(priv->mdev)) mlx5e_register_vport_reps(priv); if (netdev->reg_state != NETREG_REGISTERED) @@ -4300,7 +4300,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); - if (MLX5_VPORT_MANAGER(priv->mdev)) + if (MLX5_ESWITCH_MANAGER(priv->mdev)) mlx5e_unregister_vport_reps(priv); mlx5e_disable_async_events(priv); @@ -4483,7 +4483,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) return NULL; #ifdef CONFIG_MLX5_ESWITCH - if (MLX5_VPORT_MANAGER(mdev)) { + if (MLX5_ESWITCH_MANAGER(mdev)) { rpriv = mlx5e_alloc_nic_rep_priv(mdev); if (!rpriv) { mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 4727e7390834..3f6a7ca1cee8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -710,7 +710,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep; - if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + if (!MLX5_ESWITCH_MANAGER(priv->mdev)) return false; rep = rpriv->rep; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d9fd8570b07c..c699055c0ffd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -912,8 +912,8 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink) if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return -EOPNOTSUPP; - if (!MLX5_CAP_GEN(dev, vport_group_manager)) - return -EOPNOTSUPP; + if(!MLX5_ESWITCH_MANAGER(dev)) + return -EPERM; if (dev->priv.eswitch->mode == SRIOV_NONE) return -EOPNOTSUPP; -- GitLab From b216867c02ac1146f3b66768845814150732f1e2 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 4 Jun 2018 19:46:53 +0300 Subject: [PATCH 0197/1291] net/mlx5e: Don't attempt to dereference the ppriv struct if not being eswitch manager [ Upstream commit 8ffd569aaa818f2624ca821d9a246342fa8b8c50 ] The check for cpu hit statistics was not returning immediate false for any non vport rep netdev and hence we crashed (say on mlx5 probed VFs) if user-space tool was calling into any possible netdev in the system. Fix that by doing a proper check before dereferencing. Fixes: 1d447a39142e ('net/mlx5e: Extendable vport representor netdev private data') Signed-off-by: Or Gerlitz Reported-by: Eli Cohen Reviewed-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3f6a7ca1cee8..0e70cd6d8bc8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -724,8 +724,12 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_eswitch_rep *rep; + if (!MLX5_CAP_GEN(priv->mdev, eswitch_flow_table)) + return false; + + rep = rpriv->rep; if (rep && rep->vport != FDB_UPLINK_VPORT) return true; -- GitLab From c3994f4f8bdaf660ad6740347d0de7acdeb4680a Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 31 May 2018 11:16:18 +0300 Subject: [PATCH 0198/1291] net/mlx5: E-Switch, Avoid setup attempt if not being e-switch manager [ Upstream commit 0efc8562491b7d36f6bbc4fbc8f3348cb6641e9c ] In smartnic env, the host (PF) driver might not be an e-switch manager, hence the FW will err on driver attempts to deal with setting/unsetting the eswitch and as a result the overall setup of sriov will fail. Fix that by avoiding the operation if e-switch management is not allowed for this driver instance. While here, move to use the correct name for the esw manager capability name. Fixes: 81848731ff40 ('net/mlx5: E-Switch, Add SR-IOV (FDB) support') Signed-off-by: Or Gerlitz Reported-by: Guy Kushnir Reviewed-by: Eli Cohen Tested-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 5 +++-- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 7 ++++++- include/linux/mlx5/mlx5_ifc.h | 2 +- 7 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 0e70cd6d8bc8..281911698f72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -726,7 +726,7 @@ static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep; - if (!MLX5_CAP_GEN(priv->mdev, eswitch_flow_table)) + if (!MLX5_ESWITCH_MANAGER(priv->mdev)) return false; rep = rpriv->rep; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 82e37250ed01..667415301066 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1535,7 +1535,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (!ESW_ALLOWED(esw)) return 0; - if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || + if (!MLX5_ESWITCH_MANAGER(esw->dev) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 565c8b7a399a..10bf770675f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -39,6 +39,8 @@ #include #include "lib/mpfs.h" +#define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager) + enum { SRIOV_NONE, SRIOV_LEGACY, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 33e5ff081e36..dd05cf148845 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -36,6 +36,7 @@ #include "mlx5_core.h" #include "fs_core.h" #include "fs_cmd.h" +#include "eswitch.h" #include "diag/fs_tracepoint.h" #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ @@ -2211,7 +2212,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) goto err; } - if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { + if (MLX5_ESWITCH_MANAGER(dev)) { if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) { err = init_fdb_root_ns(steering); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 2c71557d1cee..d69897a1e2ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -34,6 +34,7 @@ #include #include #include "mlx5_core.h" +#include "eswitch.h" #include "../../mlxfw/mlxfw.h" static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, @@ -152,13 +153,13 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) } if (MLX5_CAP_GEN(dev, vport_group_manager) && - MLX5_CAP_GEN(dev, eswitch_flow_table)) { + MLX5_ESWITCH_MANAGER(dev)) { err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE); if (err) return err; } - if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { + if (MLX5_ESWITCH_MANAGER(dev)) { err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 2a8b529ce6dd..a0674962f02c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -88,6 +88,9 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) return -EBUSY; } + if (!MLX5_ESWITCH_MANAGER(dev)) + goto enable_vfs_hca; + err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); if (err) { mlx5_core_warn(dev, @@ -95,6 +98,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) return err; } +enable_vfs_hca: for (vf = 0; vf < num_vfs; vf++) { err = mlx5_core_enable_hca(dev, vf + 1); if (err) { @@ -140,7 +144,8 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) } out: - mlx5_eswitch_disable_sriov(dev->priv.eswitch); + if (MLX5_ESWITCH_MANAGER(dev)) + mlx5_eswitch_disable_sriov(dev->priv.eswitch); if (mlx5_wait_for_vf_pages(dev)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f3765155fa4d..1d793d86d55f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -857,7 +857,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_1a4[0x1]; u8 ets[0x1]; u8 nic_flow_table[0x1]; - u8 eswitch_flow_table[0x1]; + u8 eswitch_manager[0x1]; u8 early_vf_enable[0x1]; u8 mcam_reg[0x1]; u8 pcam_reg[0x1]; -- GitLab From 075b5038124851d8fc79c68cb74680b20fbd2736 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Tue, 12 Jun 2018 16:14:31 +0300 Subject: [PATCH 0199/1291] net/mlx5: Fix command interface race in polling mode [ Upstream commit d412c31dae053bf30a1bc15582a9990df297a660 ] The command interface can work in two modes: Events and Polling. In the general case, each time we invoke a command, a work is queued to handle it. When working in events, the interrupt handler completes the command execution. On the other hand, when working in polling mode, the work itself completes it. Due to a bug in the work handler, a command could have been completed by the interrupt handler, while the work handler hasn't finished yet, causing the it to complete once again if the command interface mode was changed from Events to polling after the interrupt handler was called. mlx5_unload_one() mlx5_stop_eqs() // Destroy the EQ before cmd EQ ...cmd_work_handler() write_doorbell() --> EVENT_TYPE_CMD mlx5_cmd_comp_handler() // First free free_ent(cmd, ent->idx) complete(&ent->done) <-- mlx5_stop_eqs //cmd was complete // move to polling before destroying the last cmd EQ mlx5_cmd_use_polling() cmd->mode = POLL; --> cmd_work_handler (continues) if (cmd->mode == POLL) mlx5_cmd_comp_handler() // Double free The solution is to store the cmd->mode before writing the doorbell. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Alex Vesker Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 3efe45bc2471..c5b649316bc1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -801,6 +801,7 @@ static void cmd_work_handler(struct work_struct *work) unsigned long flags; bool poll_cmd = ent->polling; int alloc_ret; + int cmd_mode; sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); @@ -847,6 +848,7 @@ static void cmd_work_handler(struct work_struct *work) set_signature(ent, !cmd->checksum_disabled); dump_command(dev, ent, 1); ent->ts1 = ktime_get_ns(); + cmd_mode = cmd->mode; if (ent->callback) schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); @@ -871,7 +873,7 @@ static void cmd_work_handler(struct work_struct *work) iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell); mmiowb(); /* if not in polling don't use ent after this point */ - if (cmd->mode == CMD_MODE_POLLING || poll_cmd) { + if (cmd_mode == CMD_MODE_POLLING || poll_cmd) { poll_timeout(ent); /* make sure we read the descriptor after ownership is SW */ rmb(); -- GitLab From 8b7b5f76693c865fd85715e0c047719766eea2be Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Fri, 25 May 2018 20:25:59 +0300 Subject: [PATCH 0200/1291] net/mlx5: Fix incorrect raw command length parsing [ Upstream commit 603b7bcff824740500ddfa001d7a7168b0b38542 ] The NULL character was not set correctly for the string containing the command length, this caused failures reading the output of the command due to a random length. The fix is to initialize the output length string. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Alex Vesker Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index c5b649316bc1..cf94fdf25155 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1274,7 +1274,7 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf, { struct mlx5_core_dev *dev = filp->private_data; struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; - char outlen_str[8]; + char outlen_str[8] = {0}; int outlen; void *ptr; int err; @@ -1289,8 +1289,6 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf, if (copy_from_user(outlen_str, buf, count)) return -EFAULT; - outlen_str[7] = 0; - err = sscanf(outlen_str, "%d", &outlen); if (err < 0) return err; -- GitLab From 46ff2bc7aeb1b3d2fc36b20ddda0411889165cf1 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 13 Jun 2018 10:27:34 +0300 Subject: [PATCH 0201/1291] net/mlx5: Fix required capability for manipulating MPFS [ Upstream commit f811980444ec59ad62f9e041adbb576a821132c7 ] Manipulating of the MPFS requires eswitch manager capabilities. Fixes: eeb66cdb6826 ('net/mlx5: Separate between E-Switch and MPFS') Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index 7cb67122e8b5..22811ecd8fcd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -34,6 +34,7 @@ #include #include #include "mlx5_core.h" +#include "eswitch.h" #include "lib/mpfs.h" /* HW L2 Table (MPFS) management */ @@ -98,7 +99,7 @@ int mlx5_mpfs_init(struct mlx5_core_dev *dev) int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); struct mlx5_mpfs *mpfs; - if (!MLX5_VPORT_MANAGER(dev)) + if (!MLX5_ESWITCH_MANAGER(dev)) return 0; mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL); @@ -122,7 +123,7 @@ void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) { struct mlx5_mpfs *mpfs = dev->priv.mpfs; - if (!MLX5_VPORT_MANAGER(dev)) + if (!MLX5_ESWITCH_MANAGER(dev)) return; WARN_ON(!hlist_empty(mpfs->hash)); @@ -137,7 +138,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) u32 index; int err; - if (!MLX5_VPORT_MANAGER(dev)) + if (!MLX5_ESWITCH_MANAGER(dev)) return 0; mutex_lock(&mpfs->lock); @@ -179,7 +180,7 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) int err = 0; u32 index; - if (!MLX5_VPORT_MANAGER(dev)) + if (!MLX5_ESWITCH_MANAGER(dev)) return 0; mutex_lock(&mpfs->lock); -- GitLab From 7ae129dd6778aa9b573b17083b13674d4336fa22 Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Tue, 22 May 2018 14:14:02 +0300 Subject: [PATCH 0202/1291] net/mlx5: Fix wrong size allocation for QoS ETC TC regitster [ Upstream commit d14fcb8d877caf1b8d6bd65d444bf62b21f2070c ] The driver allocates wrong size (due to wrong struct name) when issuing a query/set request to NIC's register. Fixes: d8880795dabf ("net/mlx5e: Implement DCBNL IEEE max rate") Signed-off-by: Shay Agroskin Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index e07061f565d6..ccb6287aeeb7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -641,7 +641,7 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc); static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, int inlen) { - u32 out[MLX5_ST_SZ_DW(qtct_reg)]; + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; if (!MLX5_CAP_GEN(mdev, ets)) return -EOPNOTSUPP; @@ -653,7 +653,7 @@ static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(qtct_reg)]; + u32 in[MLX5_ST_SZ_DW(qetc_reg)]; if (!MLX5_CAP_GEN(mdev, ets)) return -EOPNOTSUPP; -- GitLab From ddbbd3e057438d4998904e39396871317a429b6b Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 22 Jun 2018 10:15:39 +0200 Subject: [PATCH 0203/1291] net: mvneta: fix the Rx desc DMA address in the Rx path [ Upstream commit 271f7ff5aa5a73488b7a9d8b84b5205fb5b2f7cc ] When using s/w buffer management, buffers are allocated and DMA mapped. When doing so on an arm64 platform, an offset correction is applied on the DMA address, before storing it in an Rx descriptor. The issue is this DMA address is then used later in the Rx path without removing the offset correction. Thus the DMA address is wrong, which can led to various issues. This patch fixes this by removing the offset correction from the DMA address retrieved from the Rx descriptor before using it in the Rx path. Fixes: 8d5047cf9ca2 ("net: mvneta: Convert to be 64 bits compatible") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index d28f873169a9..3deaa3413313 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1959,7 +1959,7 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo, rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE); index = rx_desc - rxq->descs; data = rxq->buf_virt_addr[index]; - phys_addr = rx_desc->buf_phys_addr; + phys_addr = rx_desc->buf_phys_addr - pp->rx_offset_correction; if (!mvneta_rxq_desc_is_first_last(rx_status) || (rx_status & MVNETA_RXD_ERR_SUMMARY)) { -- GitLab From 5e6b4b9b28b72acd98729ad098e720a0606de2fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Jun 2018 14:16:02 -0700 Subject: [PATCH 0204/1291] net/packet: fix use-after-free [ Upstream commit 945d015ee0c3095d2290e845565a23dedfd8027c ] We should put copy_skb in receive_queue only after a successful call to virtio_net_hdr_from_skb(). syzbot report : BUG: KASAN: use-after-free in __skb_unlink include/linux/skbuff.h:1843 [inline] BUG: KASAN: use-after-free in __skb_dequeue include/linux/skbuff.h:1863 [inline] BUG: KASAN: use-after-free in skb_dequeue+0x16a/0x180 net/core/skbuff.c:2815 Read of size 8 at addr ffff8801b044ecc0 by task syz-executor217/4553 CPU: 0 PID: 4553 Comm: syz-executor217 Not tainted 4.18.0-rc1+ #111 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 print_address_description+0x6c/0x20b mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 __skb_unlink include/linux/skbuff.h:1843 [inline] __skb_dequeue include/linux/skbuff.h:1863 [inline] skb_dequeue+0x16a/0x180 net/core/skbuff.c:2815 skb_queue_purge+0x26/0x40 net/core/skbuff.c:2852 packet_set_ring+0x675/0x1da0 net/packet/af_packet.c:4331 packet_release+0x630/0xd90 net/packet/af_packet.c:2991 __sock_release+0xd7/0x260 net/socket.c:603 sock_close+0x19/0x20 net/socket.c:1186 __fput+0x35b/0x8b0 fs/file_table.c:209 ____fput+0x15/0x20 fs/file_table.c:243 task_work_run+0x1ec/0x2a0 kernel/task_work.c:113 exit_task_work include/linux/task_work.h:22 [inline] do_exit+0x1b08/0x2750 kernel/exit.c:865 do_group_exit+0x177/0x440 kernel/exit.c:968 __do_sys_exit_group kernel/exit.c:979 [inline] __se_sys_exit_group kernel/exit.c:977 [inline] __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:977 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4448e9 Code: Bad RIP value. RSP: 002b:00007ffd5f777ca8 EFLAGS: 00000202 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004448e9 RDX: 00000000004448e9 RSI: 000000000000fcfb RDI: 0000000000000001 RBP: 00000000006cf018 R08: 00007ffd0000a45b R09: 0000000000000000 R10: 00007ffd5f777e48 R11: 0000000000000202 R12: 00000000004021f0 R13: 0000000000402280 R14: 0000000000000000 R15: 0000000000000000 Allocated by task 4553: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 kmem_cache_alloc+0x12e/0x760 mm/slab.c:3554 skb_clone+0x1f5/0x500 net/core/skbuff.c:1282 tpacket_rcv+0x28f7/0x3200 net/packet/af_packet.c:2221 deliver_skb net/core/dev.c:1925 [inline] deliver_ptype_list_skb net/core/dev.c:1940 [inline] __netif_receive_skb_core+0x1bfb/0x3680 net/core/dev.c:4611 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4693 netif_receive_skb_internal+0x12e/0x7d0 net/core/dev.c:4767 netif_receive_skb+0xbf/0x420 net/core/dev.c:4791 tun_rx_batched.isra.55+0x4ba/0x8c0 drivers/net/tun.c:1571 tun_get_user+0x2af1/0x42f0 drivers/net/tun.c:1981 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:2009 call_write_iter include/linux/fs.h:1795 [inline] new_sync_write fs/read_write.c:474 [inline] __vfs_write+0x6c6/0x9f0 fs/read_write.c:487 vfs_write+0x1f8/0x560 fs/read_write.c:549 ksys_write+0x101/0x260 fs/read_write.c:598 __do_sys_write fs/read_write.c:610 [inline] __se_sys_write fs/read_write.c:607 [inline] __x64_sys_write+0x73/0xb0 fs/read_write.c:607 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 4553: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kmem_cache_free+0x86/0x2d0 mm/slab.c:3756 kfree_skbmem+0x154/0x230 net/core/skbuff.c:582 __kfree_skb net/core/skbuff.c:642 [inline] kfree_skb+0x1a5/0x580 net/core/skbuff.c:659 tpacket_rcv+0x189e/0x3200 net/packet/af_packet.c:2385 deliver_skb net/core/dev.c:1925 [inline] deliver_ptype_list_skb net/core/dev.c:1940 [inline] __netif_receive_skb_core+0x1bfb/0x3680 net/core/dev.c:4611 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4693 netif_receive_skb_internal+0x12e/0x7d0 net/core/dev.c:4767 netif_receive_skb+0xbf/0x420 net/core/dev.c:4791 tun_rx_batched.isra.55+0x4ba/0x8c0 drivers/net/tun.c:1571 tun_get_user+0x2af1/0x42f0 drivers/net/tun.c:1981 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:2009 call_write_iter include/linux/fs.h:1795 [inline] new_sync_write fs/read_write.c:474 [inline] __vfs_write+0x6c6/0x9f0 fs/read_write.c:487 vfs_write+0x1f8/0x560 fs/read_write.c:549 ksys_write+0x101/0x260 fs/read_write.c:598 __do_sys_write fs/read_write.c:610 [inline] __se_sys_write fs/read_write.c:607 [inline] __x64_sys_write+0x73/0xb0 fs/read_write.c:607 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8801b044ecc0 which belongs to the cache skbuff_head_cache of size 232 The buggy address is located 0 bytes inside of 232-byte region [ffff8801b044ecc0, ffff8801b044eda8) The buggy address belongs to the page: page:ffffea0006c11380 count:1 mapcount:0 mapping:ffff8801d9be96c0 index:0x0 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffffea0006c17988 ffff8801d9bec248 ffff8801d9be96c0 raw: 0000000000000000 ffff8801b044e040 000000010000000c 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801b044eb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff8801b044ec00: 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc >ffff8801b044ec80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ^ ffff8801b044ed00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801b044ed80: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc Fixes: 58d19b19cd99 ("packet: vnet_hdr support for tpacket_rcv") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4fe2e34522d6..27dafe36f29c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2303,6 +2303,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (po->stats.stats1.tp_drops) status |= TP_STATUS_LOSING; } + + if (do_vnet && + virtio_net_hdr_from_skb(skb, h.raw + macoff - + sizeof(struct virtio_net_hdr), + vio_le(), true, 0)) + goto drop_n_account; + po->stats.stats1.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; @@ -2310,15 +2317,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, } spin_unlock(&sk->sk_receive_queue.lock); - if (do_vnet) { - if (virtio_net_hdr_from_skb(skb, h.raw + macoff - - sizeof(struct virtio_net_hdr), - vio_le(), true, 0)) { - spin_lock(&sk->sk_receive_queue.lock); - goto drop_n_account; - } - } - skb_copy_bits(skb, 0, h.raw + macoff, snaplen); if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) -- GitLab From b36f997add36c35c893c3dff7e1e01caa9541a13 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 15 Jun 2018 13:27:31 +0300 Subject: [PATCH 0205/1291] net_sched: blackhole: tell upper qdisc about dropped packets [ Upstream commit 7e85dc8cb35abf16455f1511f0670b57c1a84608 ] When blackhole is used on top of classful qdisc like hfsc it breaks qlen and backlog counters because packets are disappear without notice. In HFSC non-zero qlen while all classes are inactive triggers warning: WARNING: ... at net/sched/sch_hfsc.c:1393 hfsc_dequeue+0xba4/0xe90 [sch_hfsc] and schedules watchdog work endlessly. This patch return __NET_XMIT_BYPASS in addition to NET_XMIT_SUCCESS, this flag tells upper layer: this packet is gone and isn't queued. Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_blackhole.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index c98a61e980ba..9c4c2bb547d7 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -21,7 +21,7 @@ static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { qdisc_drop(skb, sch, to_free); - return NET_XMIT_SUCCESS; + return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } static struct sk_buff *blackhole_dequeue(struct Qdisc *sch) -- GitLab From b3c66b54d8fef1d032580d3f7660279cdd277576 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Jun 2018 19:18:50 -0700 Subject: [PATCH 0206/1291] net: sungem: fix rx checksum support [ Upstream commit 12b03558cef6d655d0d394f5e98a6fd07c1f6c0f ] After commit 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"), sungem owners reported the infamous "eth0: hw csum failure" message. CHECKSUM_COMPLETE has in fact never worked for this driver, but this was masked by the fact that upper stacks had to strip the FCS, and therefore skb->ip_summed was set back to CHECKSUM_NONE before my recent change. Driver configures a number of bytes to skip when the chip computes the checksum, and for some reason only half of the Ethernet header was skipped. Then a second problem is that we should strip the FCS by default, unless the driver is updated to eventually support NETIF_F_RXFCS in the future. Finally, a driver should check if NETIF_F_RXCSUM feature is enabled or not, so that the admin can turn off rx checksum if wanted. Many thanks to Andreas Schwab and Mathieu Malaterre for their help in debugging this issue. Signed-off-by: Eric Dumazet Reported-by: Meelis Roos Reported-by: Mathieu Malaterre Reported-by: Andreas Schwab Tested-by: Andreas Schwab Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sun/sungem.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index fa607d062cb3..15cd086e3f47 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -59,8 +59,7 @@ #include #include "sungem.h" -/* Stripping FCS is causing problems, disabled for now */ -#undef STRIP_FCS +#define STRIP_FCS #define DEFAULT_MSG (NETIF_MSG_DRV | \ NETIF_MSG_PROBE | \ @@ -434,7 +433,7 @@ static int gem_rxmac_reset(struct gem *gp) writel(desc_dma & 0xffffffff, gp->regs + RXDMA_DBLOW); writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK); val = (RXDMA_CFG_BASE | (RX_OFFSET << 10) | - ((14 / 2) << 13) | RXDMA_CFG_FTHRESH_128); + (ETH_HLEN << 13) | RXDMA_CFG_FTHRESH_128); writel(val, gp->regs + RXDMA_CFG); if (readl(gp->regs + GREG_BIFCFG) & GREG_BIFCFG_M66EN) writel(((5 & RXDMA_BLANK_IPKTS) | @@ -759,7 +758,6 @@ static int gem_rx(struct gem *gp, int work_to_do) struct net_device *dev = gp->dev; int entry, drops, work_done = 0; u32 done; - __sum16 csum; if (netif_msg_rx_status(gp)) printk(KERN_DEBUG "%s: rx interrupt, done: %d, rx_new: %d\n", @@ -854,9 +852,13 @@ static int gem_rx(struct gem *gp, int work_to_do) skb = copy_skb; } - csum = (__force __sum16)htons((status & RXDCTRL_TCPCSUM) ^ 0xffff); - skb->csum = csum_unfold(csum); - skb->ip_summed = CHECKSUM_COMPLETE; + if (likely(dev->features & NETIF_F_RXCSUM)) { + __sum16 csum; + + csum = (__force __sum16)htons((status & RXDCTRL_TCPCSUM) ^ 0xffff); + skb->csum = csum_unfold(csum); + skb->ip_summed = CHECKSUM_COMPLETE; + } skb->protocol = eth_type_trans(skb, gp->dev); napi_gro_receive(&gp->napi, skb); @@ -1760,7 +1762,7 @@ static void gem_init_dma(struct gem *gp) writel(0, gp->regs + TXDMA_KICK); val = (RXDMA_CFG_BASE | (RX_OFFSET << 10) | - ((14 / 2) << 13) | RXDMA_CFG_FTHRESH_128); + (ETH_HLEN << 13) | RXDMA_CFG_FTHRESH_128); writel(val, gp->regs + RXDMA_CFG); writel(desc_dma >> 32, gp->regs + RXDMA_DBHI); @@ -2986,8 +2988,8 @@ static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); /* We can do scatter/gather and HW checksum */ - dev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM; - dev->features |= dev->hw_features | NETIF_F_RXCSUM; + dev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + dev->features = dev->hw_features; if (pci_using_dac) dev->features |= NETIF_F_HIGHDMA; -- GitLab From 35e324ebeee0fed7d7d30b6ed9873a3f06f86e94 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 18 Jun 2018 12:30:37 -0700 Subject: [PATCH 0207/1291] net/tcp: Fix socket lookups with SO_BINDTODEVICE [ Upstream commit 8c43bd1706885ba1acfa88da02bc60a2ec16f68c ] Similar to 69678bcd4d2d ("udp: fix SO_BINDTODEVICE"), TCP socket lookups need to fail if dev_match is not true. Currently, a packet to a given port can match a socket bound to device when it should not. In the VRF case, this causes the lookup to hit a VRF socket and not a global socket resulting in a response trying to go through the VRF when it should not. Fixes: 3fa6f616a7a4d ("net: ipv4: add second dif to inet socket lookups") Fixes: 4297a0ef08572 ("net: ipv6: add second dif to inet6 socket lookups") Reported-by: Lou Berger Diagnosed-by: Renato Westphal Tested-by: Renato Westphal Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_hashtables.c | 4 ++-- net/ipv6/inet6_hashtables.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e7d15fb0d94d..24b066c32e06 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -188,9 +188,9 @@ static inline int compute_score(struct sock *sk, struct net *net, bool dev_match = (sk->sk_bound_dev_if == dif || sk->sk_bound_dev_if == sdif); - if (exact_dif && !dev_match) + if (!dev_match) return -1; - if (sk->sk_bound_dev_if && dev_match) + if (sk->sk_bound_dev_if) score += 4; } if (sk->sk_incoming_cpu == raw_smp_processor_id()) diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b01858f5deb1..6dc93ac28261 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -113,9 +113,9 @@ static inline int compute_score(struct sock *sk, struct net *net, bool dev_match = (sk->sk_bound_dev_if == dif || sk->sk_bound_dev_if == sdif); - if (exact_dif && !dev_match) + if (!dev_match) return -1; - if (sk->sk_bound_dev_if && dev_match) + if (sk->sk_bound_dev_if) score++; } if (sk->sk_incoming_cpu == raw_smp_processor_id()) -- GitLab From d725fde81ffc40e6e481c0705985471ee74a6a75 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Sun, 1 Jul 2018 20:03:08 -0700 Subject: [PATCH 0208/1291] qede: Adverstise software timestamp caps when PHC is not available. [ Upstream commit 82a4e71b1565dea8387f54503e806cf374e779ec ] When ptp clock is not available for a PF (e.g., higher PFs in NPAR mode), get-tsinfo() callback should return the software timestamp capabilities instead of returning the error. Fixes: 4c55215c ("qede: Add driver support for PTP") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qede/qede_ptp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index 9b2280badaf7..475f6ae5d4b3 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -337,8 +337,14 @@ int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *info) { struct qede_ptp *ptp = edev->ptp; - if (!ptp) - return -EIO; + if (!ptp) { + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + info->phc_index = -1; + + return 0; + } info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE | -- GitLab From 4c2849931b239432020a7d77d955817b9b5e049b Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Sun, 1 Jul 2018 20:03:06 -0700 Subject: [PATCH 0209/1291] qed: Fix setting of incorrect eswitch mode. [ Upstream commit 538f8d00ba8bb417c4d9e76c61dee59d812d8287 ] By default, driver sets the eswitch mode incorrectly as VEB (virtual Ethernet bridging). Need to set VEB eswitch mode only when sriov is enabled, and it should be to set NONE by default. The patch incorporates this change. Fixes: 0fefbfbaa ("qed*: Management firmware - notifications and defaults") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 58a689fb04db..ef2374699726 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -1782,7 +1782,7 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) DP_INFO(p_hwfn, "Failed to update driver state\n"); rc = qed_mcp_ov_update_eswitch(p_hwfn, p_hwfn->p_main_ptt, - QED_OV_ESWITCH_VEB); + QED_OV_ESWITCH_NONE); if (rc) DP_INFO(p_hwfn, "Failed to update eswitch mode\n"); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 3f40b1de7957..d08fe350ab6c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -4396,6 +4396,8 @@ static void qed_sriov_enable_qid_config(struct qed_hwfn *hwfn, static int qed_sriov_enable(struct qed_dev *cdev, int num) { struct qed_iov_vf_init_params params; + struct qed_hwfn *hwfn; + struct qed_ptt *ptt; int i, j, rc; if (num >= RESC_NUM(&cdev->hwfns[0], QED_VPORT)) { @@ -4408,8 +4410,8 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num) /* Initialize HW for VF access */ for_each_hwfn(cdev, j) { - struct qed_hwfn *hwfn = &cdev->hwfns[j]; - struct qed_ptt *ptt = qed_ptt_acquire(hwfn); + hwfn = &cdev->hwfns[j]; + ptt = qed_ptt_acquire(hwfn); /* Make sure not to use more than 16 queues per VF */ params.num_queues = min_t(int, @@ -4445,6 +4447,19 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num) goto err; } + hwfn = QED_LEADING_HWFN(cdev); + ptt = qed_ptt_acquire(hwfn); + if (!ptt) { + DP_ERR(hwfn, "Failed to acquire ptt\n"); + rc = -EBUSY; + goto err; + } + + rc = qed_mcp_ov_update_eswitch(hwfn, ptt, QED_OV_ESWITCH_VEB); + if (rc) + DP_INFO(cdev, "Failed to update eswitch mode\n"); + qed_ptt_release(hwfn, ptt); + return num; err: -- GitLab From 023a2043bc8aacbf4418d41ee849b73c83e1cd3c Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Sun, 1 Jul 2018 20:03:07 -0700 Subject: [PATCH 0210/1291] qed: Fix use of incorrect size in memcpy call. [ Upstream commit cc9b27cdf7bd3c86df73439758ac1564bc8f5bbe ] Use the correct size value while copying chassis/port id values. Fixes: 6ad8c632e ("qed: Add support for query/config dcbx.") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 8f6ccc0c39e5..b306961b02fd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -700,9 +700,9 @@ qed_dcbx_get_local_lldp_params(struct qed_hwfn *p_hwfn, p_local = &p_hwfn->p_dcbx_info->lldp_local[LLDP_NEAREST_BRIDGE]; memcpy(params->lldp_local.local_chassis_id, p_local->local_chassis_id, - ARRAY_SIZE(p_local->local_chassis_id)); + sizeof(p_local->local_chassis_id)); memcpy(params->lldp_local.local_port_id, p_local->local_port_id, - ARRAY_SIZE(p_local->local_port_id)); + sizeof(p_local->local_port_id)); } static void @@ -714,9 +714,9 @@ qed_dcbx_get_remote_lldp_params(struct qed_hwfn *p_hwfn, p_remote = &p_hwfn->p_dcbx_info->lldp_remote[LLDP_NEAREST_BRIDGE]; memcpy(params->lldp_remote.peer_chassis_id, p_remote->peer_chassis_id, - ARRAY_SIZE(p_remote->peer_chassis_id)); + sizeof(p_remote->peer_chassis_id)); memcpy(params->lldp_remote.peer_port_id, p_remote->peer_port_id, - ARRAY_SIZE(p_remote->peer_port_id)); + sizeof(p_remote->peer_port_id)); } static int -- GitLab From dd537828bf733a81c2748a4ba90876b24d0720c5 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Sun, 1 Jul 2018 20:03:05 -0700 Subject: [PATCH 0211/1291] qed: Limit msix vectors in kdump kernel to the minimum required count. [ Upstream commit bb7858ba1102f82470a917e041fd23e6385c31be ] Memory size is limited in the kdump kernel environment. Allocation of more msix-vectors (or queues) consumes few tens of MBs of memory, which might lead to the kdump kernel failure. This patch adds changes to limit the number of MSI-X vectors in kdump kernel to minimum required value (i.e., 2 per engine). Fixes: fe56b9e6a ("qed: Add module with basic common support") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 27832885a87f..2c958921dfb3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -779,6 +779,14 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, /* We want a minimum of one slowpath and one fastpath vector per hwfn */ cdev->int_params.in.min_msix_cnt = cdev->num_hwfns * 2; + if (is_kdump_kernel()) { + DP_INFO(cdev, + "Kdump kernel: Limit the max number of requested MSI-X vectors to %hd\n", + cdev->int_params.in.min_msix_cnt); + cdev->int_params.in.num_vectors = + cdev->int_params.in.min_msix_cnt; + } + rc = qed_set_int_mode(cdev, false); if (rc) { DP_ERR(cdev, "qed_slowpath_setup_int ERR\n"); -- GitLab From d8c1603d0bb47caafbaeaf76100842e8e0c46de8 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Sat, 23 Jun 2018 23:22:52 +0200 Subject: [PATCH 0212/1291] qmi_wwan: add support for the Dell Wireless 5821e module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e7e197edd09c25774b4f12cab19f9d5462f240f4 ] This module exposes two USB configurations: a QMI+AT capable setup on USB config #1 and a MBIM capable setup on USB config #2. By default the kernel will choose the MBIM capable configuration as long as the cdc_mbim driver is available. This patch adds support for the QMI port in the secondary configuration. Signed-off-by: Aleksander Morgado Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index b23ee948e7c9..0db500bf86d9 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1245,6 +1245,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81b3, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ + {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e */ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ -- GitLab From 5e90946baa576df0c28b74cbde16db236f2afd79 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 25 Jun 2018 09:26:27 +0200 Subject: [PATCH 0213/1291] r8152: napi hangup fix after disconnect [ Upstream commit 0ee1f4734967af8321ecebaf9c74221ace34f2d5 ] When unplugging an r8152 adapter while the interface is UP, the NIC becomes unusable. usb->disconnect (aka rtl8152_disconnect) deletes napi. Then, rtl8152_disconnect calls unregister_netdev and that invokes netdev->ndo_stop (aka rtl8152_close). rtl8152_close tries to napi_disable, but the napi is already deleted by disconnect above. So the first while loop in napi_disable never finishes. This results in complete deadlock of the network layer as there is rtnl_mutex held by unregister_netdev. So avoid the call to napi_disable in rtl8152_close when the device is already gone. The other calls to usb_kill_urb, cancel_delayed_work_sync, netif_stop_queue etc. seem to be fine. The urb and netdev is not destroyed yet. Signed-off-by: Jiri Slaby Cc: linux-usb@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/r8152.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index aa88b640cb6c..0fa64cc1a011 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3959,7 +3959,8 @@ static int rtl8152_close(struct net_device *netdev) #ifdef CONFIG_PM_SLEEP unregister_pm_notifier(&tp->pm_notifier); #endif - napi_disable(&tp->napi); + if (!test_bit(RTL8152_UNPLUG, &tp->flags)) + napi_disable(&tp->napi); clear_bit(WORK_ENABLE, &tp->flags); usb_kill_urb(tp->intr_urb); cancel_delayed_work_sync(&tp->schedule); -- GitLab From 32761addd300814d4d8e76a9db1587b1b74e1cf8 Mon Sep 17 00:00:00 2001 From: Bhadram Varka Date: Sun, 17 Jun 2018 20:02:05 +0530 Subject: [PATCH 0214/1291] stmmac: fix DMA channel hang in half-duplex mode [ Upstream commit b6cfffa7ad923c73f317ea50fd4ebcb3b4b6669c ] HW does not support Half-duplex mode in multi-queue scenario. Fix it by not advertising the Half-Duplex mode if multi-queue enabled. Signed-off-by: Bhadram Varka Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 9866d2e34cdd..27f2e650e27b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -914,6 +914,7 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) static int stmmac_init_phy(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); + u32 tx_cnt = priv->plat->tx_queues_to_use; struct phy_device *phydev; char phy_id_fmt[MII_BUS_ID_SIZE + 3]; char bus_id[MII_BUS_ID_SIZE]; @@ -954,6 +955,15 @@ static int stmmac_init_phy(struct net_device *dev) phydev->advertising &= ~(SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full); + /* + * Half-duplex mode not supported with multiqueue + * half-duplex can only works with single queue + */ + if (tx_cnt > 1) + phydev->supported &= ~(SUPPORTED_1000baseT_Half | + SUPPORTED_100baseT_Half | + SUPPORTED_10baseT_Half); + /* * Broken HW is sometimes missing the pull-up resistor on the * MDIO line, which results in reads to non-existent devices returning -- GitLab From 4f5f7bce308eebcfc8cf63134d4147ccfa8ccb32 Mon Sep 17 00:00:00 2001 From: Doron Roberts-Kedes Date: Tue, 26 Jun 2018 18:33:33 -0700 Subject: [PATCH 0215/1291] strparser: Remove early eaten to fix full tcp receive buffer stall [ Upstream commit 977c7114ebda2e746a114840d3a875e0cdb826fb ] On receving an incomplete message, the existing code stores the remaining length of the cloned skb in the early_eaten field instead of incrementing the value returned by __strp_recv. This defers invocation of sock_rfree for the current skb until the next invocation of __strp_recv, which returns early_eaten if early_eaten is non-zero. This behavior causes a stall when the current message occupies the very tail end of a massive skb, and strp_peek/need_bytes indicates that the remainder of the current message has yet to arrive on the socket. The TCP receive buffer is totally full, causing the TCP window to go to zero, so the remainder of the message will never arrive. Incrementing the value returned by __strp_recv by the amount otherwise stored in early_eaten prevents stalls of this nature. Signed-off-by: Doron Roberts-Kedes Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/strparser/strparser.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index c741365f77da..a68c754e84ea 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -35,7 +35,6 @@ struct _strp_msg { */ struct strp_msg strp; int accum_len; - int early_eaten; }; static inline struct _strp_msg *_strp_msg(struct sk_buff *skb) @@ -115,20 +114,6 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, head = strp->skb_head; if (head) { /* Message already in progress */ - - stm = _strp_msg(head); - if (unlikely(stm->early_eaten)) { - /* Already some number of bytes on the receive sock - * data saved in skb_head, just indicate they - * are consumed. - */ - eaten = orig_len <= stm->early_eaten ? - orig_len : stm->early_eaten; - stm->early_eaten -= eaten; - - return eaten; - } - if (unlikely(orig_offset)) { /* Getting data with a non-zero offset when a message is * in progress is not expected. If it does happen, we @@ -297,9 +282,9 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, } stm->accum_len += cand_len; + eaten += cand_len; strp->need_bytes = stm->strp.full_len - stm->accum_len; - stm->early_eaten = cand_len; STRP_STATS_ADD(strp->stats.bytes, cand_len); desc->count = 0; /* Stop reading socket */ break; -- GitLab From 3373d6d056d7b9d1de397498389891b8b95dfd8a Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 27 Jun 2018 16:04:48 -0700 Subject: [PATCH 0216/1291] tcp: fix Fast Open key endianness [ Upstream commit c860e997e9170a6d68f9d1e6e2cf61f572191aaf ] Fast Open key could be stored in different endian based on the CPU. Previously hosts in different endianness in a server farm using the same key config (sysctl value) would produce different cookies. This patch fixes it by always storing it as little endian to keep same API for LE hosts. Reported-by: Daniele Iamartino Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/sysctl_net_ipv4.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0989e739d098..5a29dc5083a3 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -258,8 +258,9 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, { struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; struct tcp_fastopen_context *ctxt; - int ret; u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ + __le32 key[4]; + int ret, i; tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); if (!tbl.data) @@ -268,11 +269,14 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, rcu_read_lock(); ctxt = rcu_dereference(tcp_fastopen_ctx); if (ctxt) - memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); + memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); else - memset(user_key, 0, sizeof(user_key)); + memset(key, 0, sizeof(key)); rcu_read_unlock(); + for (i = 0; i < ARRAY_SIZE(key); i++) + user_key[i] = le32_to_cpu(key[i]); + snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", user_key[0], user_key[1], user_key[2], user_key[3]); ret = proc_dostring(&tbl, write, buffer, lenp, ppos); @@ -288,12 +292,16 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, * first invocation of tcp_fastopen_cookie_gen */ tcp_fastopen_init_key_once(false); - tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH); + + for (i = 0; i < ARRAY_SIZE(user_key); i++) + key[i] = cpu_to_le32(user_key[i]); + + tcp_fastopen_reset_cipher(key, TCP_FASTOPEN_KEY_LENGTH); } bad_key: pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", - user_key[0], user_key[1], user_key[2], user_key[3], + user_key[0], user_key[1], user_key[2], user_key[3], (char *)tbl.data, ret); kfree(tbl.data); return ret; -- GitLab From 2dc4696ee6d9471ce110157fb695f75b8fd912e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 29 Jun 2018 13:07:53 +0300 Subject: [PATCH 0217/1291] tcp: prevent bogus FRTO undos with non-SACK flows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1236f22fbae15df3736ab4a984c64c0c6ee6254c ] If SACK is not enabled and the first cumulative ACK after the RTO retransmission covers more than the retransmitted skb, a spurious FRTO undo will trigger (assuming FRTO is enabled for that RTO). The reason is that any non-retransmitted segment acknowledged will set FLAG_ORIG_SACK_ACKED in tcp_clean_rtx_queue even if there is no indication that it would have been delivered for real (the scoreboard is not kept with TCPCB_SACKED_ACKED bits in the non-SACK case so the check for that bit won't help like it does with SACK). Having FLAG_ORIG_SACK_ACKED set results in the spurious FRTO undo in tcp_process_loss. We need to use more strict condition for non-SACK case and check that none of the cumulatively ACKed segments were retransmitted to prove that progress is due to original transmissions. Only then keep FLAG_ORIG_SACK_ACKED set, allowing FRTO undo to proceed in non-SACK case. (FLAG_ORIG_SACK_ACKED is planned to be renamed to FLAG_ORIG_PROGRESS to better indicate its purpose but to keep this change minimal, it will be done in another patch). Besides burstiness and congestion control violations, this problem can result in RTO loop: When the loss recovery is prematurely undoed, only new data will be transmitted (if available) and the next retransmission can occur only after a new RTO which in case of multiple losses (that are not for consecutive packets) requires one RTO per loss to recover. Signed-off-by: Ilpo Järvinen Tested-by: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f0caff3139ed..5711b1b12d28 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3194,6 +3194,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (tcp_is_reno(tp)) { tcp_remove_reno_sacks(sk, pkts_acked); + + /* If any of the cumulatively ACKed segments was + * retransmitted, non-SACK case cannot confirm that + * progress was due to original transmission due to + * lack of TCPCB_SACKED_ACKED bits even if some of + * the packets may have been never retransmitted. + */ + if (flag & FLAG_RETRANS_DATA_ACKED) + flag &= ~FLAG_ORIG_SACK_ACKED; } else { int delta; -- GitLab From 7eba6537c3d17ca582d4b3ab38af4463020563d9 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 21 Jun 2018 13:11:31 +0800 Subject: [PATCH 0218/1291] vhost_net: validate sock before trying to put its fd [ Upstream commit b8f1f65882f07913157c44673af7ec0b308d03eb ] Sock will be NULL if we pass -1 to vhost_net_set_backend(), but when we meet errors during ubuf allocation, the code does not check for NULL before calling sockfd_put(), this will lead NULL dereferencing. Fixing by checking sock pointer before. Fixes: bab632d69ee4 ("vhost: vhost TX zero-copy support") Reported-by: Dan Carpenter Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index b0d606b2d06c..6123b4dd8638 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1186,7 +1186,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) if (ubufs) vhost_net_ubuf_put_wait_and_free(ubufs); err_ubufs: - sockfd_put(sock); + if (sock) + sockfd_put(sock); err_vq: mutex_unlock(&vq->mutex); err: -- GitLab From d8530e891edde17506727af174d66d3f0b4e6b34 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Wed, 20 Jun 2018 15:51:51 +0200 Subject: [PATCH 0219/1291] VSOCK: fix loopback on big-endian systems [ Upstream commit e5ab564c9ebee77794842ca7d7476147b83d6a27 ] The dst_cid and src_cid are 64 bits, therefore 64 bit accessors should be used, and in fact in virtio_transport_common.c only 64 bit accessors are used. Using 32 bit accessors for 64 bit values breaks big endian systems. This patch fixes a wrong use of le32_to_cpu in virtio_transport_send_pkt. Fixes: b9116823189e85ccf384 ("VSOCK: add loopback to virtio_transport") Signed-off-by: Claudio Imbrenda Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/virtio_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 403d86e80162..fdb294441682 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -201,7 +201,7 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt) return -ENODEV; } - if (le32_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) + if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) return virtio_transport_send_pkt_loopback(vsock, pkt); if (pkt->reply) -- GitLab From 3caea5150c15c4258a8d9ee0c5758d53d4753009 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 16 Jul 2018 20:59:58 -0500 Subject: [PATCH 0220/1291] net: cxgb3_main: fix potential Spectre v1 commit 676bcfece19f83621e905aa55b5ed2d45cc4f2d3 upstream. t.qset_idx can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c:2286 cxgb_extension_ioctl() warn: potential spectre issue 'adapter->msix_info' Fix this by sanitizing t.qset_idx before using it to index adapter->msix_info Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 6a015362c340..bf291e90cdb0 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "common.h" #include "cxgb3_ioctl.h" @@ -2268,6 +2269,7 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) if (t.qset_idx >= nqsets) return -EINVAL; + t.qset_idx = array_index_nospec(t.qset_idx, nqsets); q = &adapter->params.sge.qset[q1 + t.qset_idx]; t.rspq_size = q->rspq_size; -- GitLab From ee8d2e719c1e5ade96524d4480cffc17f8c756a8 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 22 Jun 2018 13:31:57 +0800 Subject: [PATCH 0221/1291] rtlwifi: Fix kernel Oops "Fw download fail!!" commit 12dfa2f68ab659636e092db13b5d17cf9aac82af upstream. When connecting to AP, mac80211 asks driver to enter and leave PS quickly, but driver deinit doesn't wait for delayed work complete when entering PS, then driver reinit procedure and delay work are running simultaneously. This will cause unpredictable kernel oops or crash like rtl8723be: error H2C cmd because of Fw download fail!!! WARNING: CPU: 3 PID: 159 at drivers/net/wireless/realtek/rtlwifi/ rtl8723be/fw.c:227 rtl8723be_fill_h2c_cmd+0x182/0x510 [rtl8723be] CPU: 3 PID: 159 Comm: kworker/3:2 Tainted: G O 4.16.13-2-ARCH #1 Hardware name: ASUSTeK COMPUTER INC. X556UF/X556UF, BIOS X556UF.406 10/21/2016 Workqueue: rtl8723be_pci rtl_c2hcmd_wq_callback [rtlwifi] RIP: 0010:rtl8723be_fill_h2c_cmd+0x182/0x510 [rtl8723be] RSP: 0018:ffffa6ab01e1bd70 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffffa26069071520 RCX: 0000000000000001 RDX: 0000000080000001 RSI: ffffffff8be70e9c RDI: 00000000ffffffff RBP: 0000000000000000 R08: 0000000000000048 R09: 0000000000000348 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: ffffa26069071520 R14: 0000000000000000 R15: ffffa2607d205f70 FS: 0000000000000000(0000) GS:ffffa26081d80000(0000) knlGS:000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000443b39d3000 CR3: 000000037700a005 CR4: 00000000003606e0 Call Trace: ? halbtc_send_bt_mp_operation.constprop.17+0xd5/0xe0 [btcoexist] ? ex_btc8723b1ant_bt_info_notify+0x3b8/0x820 [btcoexist] ? rtl_c2hcmd_launcher+0xab/0x110 [rtlwifi] ? process_one_work+0x1d1/0x3b0 ? worker_thread+0x2b/0x3d0 ? process_one_work+0x3b0/0x3b0 ? kthread+0x112/0x130 ? kthread_create_on_node+0x60/0x60 ? ret_from_fork+0x35/0x40 Code: 00 76 b4 e9 e2 fe ff ff 4c 89 ee 4c 89 e7 e8 56 22 86 ca e9 5e ... This patch ensures all delayed works done before entering PS to satisfy our expectation, so use cancel_delayed_work_sync() instead. An exception is delayed work ips_nic_off_wq because running task may be itself, so add a parameter ips_wq to deinit function to handle this case. This issue is reported and fixed in below threads: https://github.com/lwfinger/rtlwifi_new/issues/367 https://github.com/lwfinger/rtlwifi_new/issues/366 Tested-by: Evgeny Kapun # 8723DE Tested-by: Shivam Kakkar # 8723BE on 4.18-rc1 Signed-off-by: Ping-Ke Shih Fixes: cceb0a597320 ("rtlwifi: Add work queue for c2h cmd.") Cc: Stable # 4.11+ Reviewed-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/base.c | 17 ++++++++++------- drivers/net/wireless/realtek/rtlwifi/base.h | 2 +- drivers/net/wireless/realtek/rtlwifi/core.c | 2 +- drivers/net/wireless/realtek/rtlwifi/pci.c | 2 +- drivers/net/wireless/realtek/rtlwifi/ps.c | 4 ++-- drivers/net/wireless/realtek/rtlwifi/usb.c | 2 +- 6 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index 93256f8bc0b5..ec82c1c3f12e 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -483,18 +483,21 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw) } -void rtl_deinit_deferred_work(struct ieee80211_hw *hw) +void rtl_deinit_deferred_work(struct ieee80211_hw *hw, bool ips_wq) { struct rtl_priv *rtlpriv = rtl_priv(hw); del_timer_sync(&rtlpriv->works.watchdog_timer); - cancel_delayed_work(&rtlpriv->works.watchdog_wq); - cancel_delayed_work(&rtlpriv->works.ips_nic_off_wq); - cancel_delayed_work(&rtlpriv->works.ps_work); - cancel_delayed_work(&rtlpriv->works.ps_rfon_wq); - cancel_delayed_work(&rtlpriv->works.fwevt_wq); - cancel_delayed_work(&rtlpriv->works.c2hcmd_wq); + cancel_delayed_work_sync(&rtlpriv->works.watchdog_wq); + if (ips_wq) + cancel_delayed_work(&rtlpriv->works.ips_nic_off_wq); + else + cancel_delayed_work_sync(&rtlpriv->works.ips_nic_off_wq); + cancel_delayed_work_sync(&rtlpriv->works.ps_work); + cancel_delayed_work_sync(&rtlpriv->works.ps_rfon_wq); + cancel_delayed_work_sync(&rtlpriv->works.fwevt_wq); + cancel_delayed_work_sync(&rtlpriv->works.c2hcmd_wq); } EXPORT_SYMBOL_GPL(rtl_deinit_deferred_work); diff --git a/drivers/net/wireless/realtek/rtlwifi/base.h b/drivers/net/wireless/realtek/rtlwifi/base.h index b56d1b7f5567..cbbb5be36a09 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.h +++ b/drivers/net/wireless/realtek/rtlwifi/base.h @@ -121,7 +121,7 @@ void rtl_init_rfkill(struct ieee80211_hw *hw); void rtl_deinit_rfkill(struct ieee80211_hw *hw); void rtl_watch_dog_timer_callback(unsigned long data); -void rtl_deinit_deferred_work(struct ieee80211_hw *hw); +void rtl_deinit_deferred_work(struct ieee80211_hw *hw, bool ips_wq); bool rtl_action_proc(struct ieee80211_hw *hw, struct sk_buff *skb, u8 is_tx); int rtlwifi_rate_mapping(struct ieee80211_hw *hw, bool isht, diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index c53cbf3d52bd..93f1779ea23e 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -196,7 +196,7 @@ static void rtl_op_stop(struct ieee80211_hw *hw) /* reset sec info */ rtl_cam_reset_sec_info(hw); - rtl_deinit_deferred_work(hw); + rtl_deinit_deferred_work(hw, false); } rtlpriv->intf_ops->adapter_stop(hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index d7331225c5f3..457a0f725c8a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -2359,7 +2359,7 @@ void rtl_pci_disconnect(struct pci_dev *pdev) ieee80211_unregister_hw(hw); rtlmac->mac80211_registered = 0; } else { - rtl_deinit_deferred_work(hw); + rtl_deinit_deferred_work(hw, false); rtlpriv->intf_ops->adapter_stop(hw); } rtlpriv->cfg->ops->disable_interrupt(hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index 07ee3096f50e..f6d00613c53d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -66,7 +66,7 @@ bool rtl_ps_disable_nic(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); /*<1> Stop all timer */ - rtl_deinit_deferred_work(hw); + rtl_deinit_deferred_work(hw, true); /*<2> Disable Interrupt */ rtlpriv->cfg->ops->disable_interrupt(hw); @@ -287,7 +287,7 @@ void rtl_ips_nic_on(struct ieee80211_hw *hw) struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); enum rf_pwrstate rtstate; - cancel_delayed_work(&rtlpriv->works.ips_nic_off_wq); + cancel_delayed_work_sync(&rtlpriv->works.ips_nic_off_wq); spin_lock(&rtlpriv->locks.ips_lock); if (ppsc->inactiveps) { diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index 5590d07d0918..820c42ff5384 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -1150,7 +1150,7 @@ void rtl_usb_disconnect(struct usb_interface *intf) ieee80211_unregister_hw(hw); rtlmac->mac80211_registered = 0; } else { - rtl_deinit_deferred_work(hw); + rtl_deinit_deferred_work(hw, false); rtlpriv->intf_ops->adapter_stop(hw); } /*deinit rfkill */ -- GitLab From 12c0949a07458103b539eaba2330763873ef4cb5 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Thu, 28 Jun 2018 10:02:27 +0800 Subject: [PATCH 0222/1291] rtlwifi: rtl8821ae: fix firmware is not ready to run commit 9a98302de19991d51e067b88750585203b2a3ab6 upstream. Without this patch, firmware will not run properly on rtl8821ae, and it causes bad user experience. For example, bad connection performance with low rate, higher power consumption, and so on. rtl8821ae uses two kinds of firmwares for normal and WoWlan cases, and each firmware has firmware data buffer and size individually. Original code always overwrite size of normal firmware rtlpriv->rtlhal.fwsize, and this mismatch causes firmware checksum error, then firmware can't start. In this situation, driver gives message "Firmware is not ready to run!". Fixes: fe89707f0afa ("rtlwifi: rtl8821ae: Simplify loading of WOWLAN firmware") Signed-off-by: Ping-Ke Shih Cc: Stable # 4.0+ Reviewed-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 93f1779ea23e..b01123138797 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -130,7 +130,6 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context, firmware->size); rtlpriv->rtlhal.wowlan_fwsize = firmware->size; } - rtlpriv->rtlhal.fwsize = firmware->size; release_firmware(firmware); } -- GitLab From 2be27d444f61c5542df5c00892817124582428a4 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 15 Jul 2018 21:53:20 +0200 Subject: [PATCH 0223/1291] net: lan78xx: Fix race in tx pending skb size calculation commit dea39aca1d7aef1e2b95b07edeacf04cc8863a2e upstream. The skb size calculation in lan78xx_tx_bh is in race with the start_xmit, which could lead to rare kernel oopses. So protect the whole skb walk with a spin lock. As a benefit we can unlink the skb directly. This patch was tested on Raspberry Pi 3B+ Link: https://github.com/raspberrypi/linux/issues/2608 Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet") Cc: stable Signed-off-by: Floris Bos Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 9881edc568ba..0aa91ab9a0fb 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3197,6 +3197,7 @@ static void lan78xx_tx_bh(struct lan78xx_net *dev) pkt_cnt = 0; count = 0; length = 0; + spin_lock_irqsave(&tqp->lock, flags); for (skb = tqp->next; pkt_cnt < tqp->qlen; skb = skb->next) { if (skb_is_gso(skb)) { if (pkt_cnt) { @@ -3205,7 +3206,8 @@ static void lan78xx_tx_bh(struct lan78xx_net *dev) } count = 1; length = skb->len - TX_OVERHEAD; - skb2 = skb_dequeue(tqp); + __skb_unlink(skb, tqp); + spin_unlock_irqrestore(&tqp->lock, flags); goto gso_skb; } @@ -3214,6 +3216,7 @@ static void lan78xx_tx_bh(struct lan78xx_net *dev) skb_totallen = skb->len + roundup(skb_totallen, sizeof(u32)); pkt_cnt++; } + spin_unlock_irqrestore(&tqp->lock, flags); /* copy to a single skb */ skb = alloc_skb(skb_totallen, GFP_ATOMIC); -- GitLab From 67f7c68a9085957a291e91ee5a177f2b208aeda6 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 14 May 2018 11:57:23 +0300 Subject: [PATCH 0224/1291] xhci: Fix USB3 NULL pointer dereference at logical disconnect. commit 2278446e2b7cd33ad894b32e7eb63afc7db6c86e upstream. Hub driver will try to disable a USB3 device twice at logical disconnect, racing with xhci_free_dev() callback from the first port disable. This can be triggered with "udisksctl power-off --block-device " or by writing "1" to the "remove" sysfs file for a USB3 device in 4.17-rc4. USB3 devices don't have a similar disabled link state as USB2 devices, and use a U3 suspended link state instead. In this state the port is still enabled and connected. hub_port_connect() first disconnects the device, then later it notices that device is still enabled (due to U3 states) it will try to disable the port again (set to U3). The xhci_free_dev() called during device disable is async, so checking for existing xhci->devs[i] when setting link state to U3 the second time was successful, even if device was being freed. The regression was caused by, and whole thing revealed by, Commit 44a182b9d177 ("xhci: Fix use-after-free in xhci_free_virt_device") which sets xhci->devs[i]->udev to NULL before xhci_virt_dev() returned. and causes a NULL pointer dereference the second time we try to set U3. Fix this by checking xhci->devs[i]->udev exists before setting link state. The original patch went to stable so this fix needs to be applied there as well. Fixes: 44a182b9d177 ("xhci: Fix use-after-free in xhci_free_virt_device") Cc: Reported-by: Jordan Glover Tested-by: Jordan Glover Signed-off-by: Mathias Nyman Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 00b8d4cdcac3..c01d1f3a1c7d 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -366,7 +366,7 @@ int xhci_find_slot_id_by_port(struct usb_hcd *hcd, struct xhci_hcd *xhci, slot_id = 0; for (i = 0; i < MAX_HC_SLOTS; i++) { - if (!xhci->devs[i]) + if (!xhci->devs[i] || !xhci->devs[i]->udev) continue; speed = xhci->devs[i]->udev->speed; if (((speed >= USB_SPEED_SUPER) == (hcd->speed >= HCD_USB3)) -- GitLab From a406abeb7416501355668222164919836da67034 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Tue, 6 Mar 2018 08:57:57 -0500 Subject: [PATCH 0225/1291] media: rc: oops in ir_timer_keyup after device unplug commit 8d4068810d9926250dd2435719a080b889eb44c3 upstream. If there is IR in the raw kfifo when ir_raw_event_unregister() is called, then kthread_stop() causes ir_raw_event_thread to be scheduled, decode some scancodes and re-arm timer_keyup. The timer_keyup then fires when the rc device is long gone. Cc: stable@vger.kernel.org Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/rc-main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 72f381522cb2..a22828713c1c 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -1824,11 +1824,11 @@ void rc_unregister_device(struct rc_dev *dev) if (!dev) return; - del_timer_sync(&dev->timer_keyup); - if (dev->driver_type == RC_DRIVER_IR_RAW) ir_raw_event_unregister(dev); + del_timer_sync(&dev->timer_keyup); + rc_free_rx_device(dev); device_del(&dev->dev); -- GitLab From 16b3ae12337eb1695de0031fe4e030fb7fd6c2d9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Apr 2018 12:00:11 +0200 Subject: [PATCH 0226/1291] clocksource: Initialize cs->wd_list commit 5b9e886a4af97574ca3ce1147f35545da0e7afc7 upstream. A number of places relies on list_empty(&cs->wd_list), however the list_head does not get initialized. Do so upon registration, such that thereafter it is possible to rely on list_empty() correctly reflecting the list membership status. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Diego Viola Reviewed-by: Rafael J. Wysocki Cc: stable@vger.kernel.org Cc: len.brown@intel.com Cc: rjw@rjwysocki.net Cc: rui.zhang@intel.com Link: https://lkml.kernel.org/r/20180430100344.472662715@infradead.org Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- kernel/time/clocksource.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 03918a19cf2d..3b71d859ee38 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -322,6 +322,8 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs) { unsigned long flags; + INIT_LIST_HEAD(&cs->wd_list); + spin_lock_irqsave(&watchdog_lock, flags); if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { /* cs is a clocksource to be watched. */ -- GitLab From c4bfed85bae8eba6cce22f9b9cce530720527411 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Sat, 7 Jul 2018 20:41:47 +0200 Subject: [PATCH 0227/1291] crypto: af_alg - Initialize sg_num_bytes in error code path commit 2546da99212f22034aecf279da9c47cbfac6c981 upstream. The RX SGL in processing is already registered with the RX SGL tracking list to support proper cleanup. The cleanup code path uses the sg_num_bytes variable which must therefore be always initialized, even in the error code path. Signed-off-by: Stephan Mueller Reported-by: syzbot+9c251bdd09f83b92ba95@syzkaller.appspotmail.com #syz test: https://github.com/google/kmsan.git master CC: #4.14 Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management") Fixes: d887c52d6ae4 ("crypto: algif_aead - overhaul memory management") Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/af_alg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 815ee1075574..42dfdd1fd6d8 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1183,8 +1183,10 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, /* make one iovec available as scatterlist */ err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen); - if (err < 0) + if (err < 0) { + rsgl->sg_num_bytes = 0; return err; + } /* chain the new scatterlist with previous one */ if (areq->last_rsgl) -- GitLab From f1059632a4fcb0a73b1c315082cdb623a3b3ef52 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 23 Jun 2018 01:06:34 +0900 Subject: [PATCH 0228/1291] mtd: rawnand: denali_dt: set clk_x_rate to 200 MHz unconditionally commit 3f6e6986045d47f87bd982910821b7ab9758487e upstream. Since commit 1bb88666775e ("mtd: nand: denali: handle timing parameters by setup_data_interface()"), denali_dt.c gets the clock rate from the clock driver. The driver expects the frequency of the bus interface clock, whereas the clock driver of SOCFPGA provides the core clock. Thus, the setup_data_interface() hook calculates timing parameters based on a wrong frequency. To make it work without relying on the clock driver, hard-code the clock frequency, 200MHz. This is fine for existing DT of UniPhier, and also fixes the issue of SOCFPGA because both platforms use 200 MHz for the bus interface clock. Fixes: 1bb88666775e ("mtd: nand: denali: handle timing parameters by setup_data_interface()") Cc: linux-stable #4.14+ Reported-by: Philipp Rosenberger Suggested-by: Boris Brezillon Signed-off-by: Masahiro Yamada Tested-by: Richard Weinberger Signed-off-by: Boris Brezillon Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/denali_dt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/denali_dt.c b/drivers/mtd/nand/denali_dt.c index 56e2e177644d..3f4f4aea0e8b 100644 --- a/drivers/mtd/nand/denali_dt.c +++ b/drivers/mtd/nand/denali_dt.c @@ -122,7 +122,11 @@ static int denali_dt_probe(struct platform_device *pdev) if (ret) return ret; - denali->clk_x_rate = clk_get_rate(dt->clk); + /* + * Hardcode the clock rate for the backward compatibility. + * This works for both SOCFPGA and UniPhier. + */ + denali->clk_x_rate = 200000000; ret = denali_init(denali); if (ret) -- GitLab From aa6be396714c79c44d5fa6b4ae09c8090fef8727 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Thu, 12 Apr 2018 19:11:58 +0100 Subject: [PATCH 0229/1291] block: do not use interruptible wait anywhere commit 1dc3039bc87ae7d19a990c3ee71cfd8a9068f428 upstream. When blk_queue_enter() waits for a queue to unfreeze, or unset the PREEMPT_ONLY flag, do not allow it to be interrupted by a signal. The PREEMPT_ONLY flag was introduced later in commit 3a0a529971ec ("block, scsi: Make SCSI quiesce and resume work reliably"). Note the SCSI device is resumed asynchronously, i.e. after un-freezing userspace tasks. So that commit exposed the bug as a regression in v4.15. A mysterious SIGBUS (or -EIO) sometimes happened during the time the device was being resumed. Most frequently, there was no kernel log message, and we saw Xorg or Xwayland killed by SIGBUS.[1] [1] E.g. https://bugzilla.redhat.com/show_bug.cgi?id=1553979 Without this fix, I get an IO error in this test: # dd if=/dev/sda of=/dev/null iflag=direct & \ while killall -SIGUSR1 dd; do sleep 0.1; done & \ echo mem > /sys/power/state ; \ sleep 5; killall dd # stop after 5 seconds The interruptible wait was added to blk_queue_enter in commit 3ef28e83ab15 ("block: generic request_queue reference counting"). Before then, the interruptible wait was only in blk-mq, but I don't think it could ever have been correct. Reviewed-by: Bart Van Assche Cc: stable@vger.kernel.org Signed-off-by: Alan Jenkins Signed-off-by: Jens Axboe Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 6f6e21821d2d..68bae6338ad4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -779,7 +779,6 @@ EXPORT_SYMBOL(blk_alloc_queue); int blk_queue_enter(struct request_queue *q, bool nowait) { while (true) { - int ret; if (percpu_ref_tryget_live(&q->q_usage_counter)) return 0; @@ -796,13 +795,11 @@ int blk_queue_enter(struct request_queue *q, bool nowait) */ smp_rmb(); - ret = wait_event_interruptible(q->mq_freeze_wq, - !atomic_read(&q->mq_freeze_depth) || - blk_queue_dying(q)); + wait_event(q->mq_freeze_wq, + !atomic_read(&q->mq_freeze_depth) || + blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; - if (ret) - return ret; } } -- GitLab From b5199c61e95c58fdfd9478bcbf368f1575d61da1 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 9 Jul 2018 13:16:07 -0500 Subject: [PATCH 0230/1291] PCI: hv: Disable/enable IRQs rather than BH in hv_compose_msi_msg() commit 35a88a18d7ea58600e11590405bc93b08e16e7f5 upstream. Commit de0aa7b2f97d ("PCI: hv: Fix 2 hang issues in hv_compose_msi_msg()") uses local_bh_disable()/enable(), because hv_pci_onchannelcallback() can also run in tasklet context as the channel event callback, so bottom halves should be disabled to prevent a race condition. With CONFIG_PROVE_LOCKING=y in the recent mainline, or old kernels that don't have commit f71b74bca637 ("irq/softirqs: Use lockdep to assert IRQs are disabled/enabled"), when the upper layer IRQ code calls hv_compose_msi_msg() with local IRQs disabled, we'll see a warning at the beginning of __local_bh_enable_ip(): IRQs not enabled as expected WARNING: CPU: 0 PID: 408 at kernel/softirq.c:162 __local_bh_enable_ip The warning exposes an issue in de0aa7b2f97d: local_bh_enable() can potentially call do_softirq(), which is not supposed to run when local IRQs are disabled. Let's fix this by using local_irq_save()/restore() instead. Note: hv_pci_onchannelcallback() is not a hot path because it's only called when the PCI device is hot added and removed, which is infrequent. Fixes: de0aa7b2f97d ("PCI: hv: Fix 2 hang issues in hv_compose_msi_msg()") Signed-off-by: Dexuan Cui Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Haiyang Zhang Cc: stable@vger.kernel.org Cc: Stephen Hemminger Cc: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-hyperv.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index caea7c618207..4523d7e1bcb9 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -1091,6 +1091,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) struct pci_bus *pbus; struct pci_dev *pdev; struct cpumask *dest; + unsigned long flags; struct compose_comp_ctxt comp; struct tran_int_desc *int_desc; struct { @@ -1182,14 +1183,15 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) * the channel callback directly when channel->target_cpu is * the current CPU. When the higher level interrupt code * calls us with interrupt enabled, let's add the - * local_bh_disable()/enable() to avoid race. + * local_irq_save()/restore() to avoid race: + * hv_pci_onchannelcallback() can also run in tasklet. */ - local_bh_disable(); + local_irq_save(flags); if (hbus->hdev->channel->target_cpu == smp_processor_id()) hv_pci_onchannelcallback(hbus); - local_bh_enable(); + local_irq_restore(flags); if (hpdev->state == hv_pcichild_ejecting) { dev_err_once(&hbus->hdev->device, -- GitLab From 766a7ad6639b4b2026d19cb217ada2ed8a2cfe57 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Jun 2018 12:14:56 +0200 Subject: [PATCH 0231/1291] netfilter: ebtables: reject non-bridge targets commit 11ff7288beb2b7da889a014aff0a7b80bf8efcf3 upstream. the ebtables evaluation loop expects targets to return positive values (jumps), or negative values (absolute verdicts). This is completely different from what xtables does. In xtables, targets are expected to return the standard netfilter verdicts, i.e. NF_DROP, NF_ACCEPT, etc. ebtables will consider these as jumps. Therefore reject any target found due to unspec fallback. v2: also reject watchers. ebtables ignores their return value, so a target that assumes skb ownership (and returns NF_STOLEN) causes use-after-free. The only watchers in the 'ebtables' front-end are log and nflog; both have AF_BRIDGE specific wrappers on kernel side. Reported-by: syzbot+2b43f681169a2a0d306a@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/bridge/netfilter/ebtables.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 25738b20676d..54c7fe68040f 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -398,6 +398,12 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par, watcher = xt_request_find_target(NFPROTO_BRIDGE, w->u.name, 0); if (IS_ERR(watcher)) return PTR_ERR(watcher); + + if (watcher->family != NFPROTO_BRIDGE) { + module_put(watcher->me); + return -ENOENT; + } + w->u.watcher = watcher; par->target = watcher; @@ -719,6 +725,13 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, goto cleanup_watchers; } + /* Reject UNSPEC, xtables verdicts/return values are incompatible */ + if (target->family != NFPROTO_BRIDGE) { + module_put(target->me); + ret = -ENOENT; + goto cleanup_watchers; + } + t->u.target = target; if (t->u.target == &ebt_standard_target) { if (gap < sizeof(struct ebt_standard_target)) { -- GitLab From cba5008502f2238b716334d7cb6d847560ce01b4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 13 Jul 2018 16:59:27 -0700 Subject: [PATCH 0232/1291] reiserfs: fix buffer overflow with long warning messages commit fe10e398e860955bac4d28ec031b701d358465e4 upstream. ReiserFS prepares log messages into a 1024-byte buffer with no bounds checks. Long messages, such as the "unknown mount option" warning when userspace passes a crafted mount options string, overflow this buffer. This causes KASAN to report a global-out-of-bounds write. Fix it by truncating messages to the buffer size. Link: http://lkml.kernel.org/r/20180707203621.30922-1-ebiggers3@gmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+b890b3335a4d8c608963@syzkaller.appspotmail.com Signed-off-by: Eric Biggers Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/prints.c | 141 +++++++++++++++++++++++++------------------ 1 file changed, 81 insertions(+), 60 deletions(-) diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 64f49cafbc5b..cfb0c9ac2de4 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -76,83 +76,99 @@ static char *le_type(struct reiserfs_key *key) } /* %k */ -static void sprintf_le_key(char *buf, struct reiserfs_key *key) +static int scnprintf_le_key(char *buf, size_t size, struct reiserfs_key *key) { if (key) - sprintf(buf, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id), - le32_to_cpu(key->k_objectid), le_offset(key), - le_type(key)); + return scnprintf(buf, size, "[%d %d %s %s]", + le32_to_cpu(key->k_dir_id), + le32_to_cpu(key->k_objectid), le_offset(key), + le_type(key)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } /* %K */ -static void sprintf_cpu_key(char *buf, struct cpu_key *key) +static int scnprintf_cpu_key(char *buf, size_t size, struct cpu_key *key) { if (key) - sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id, - key->on_disk_key.k_objectid, reiserfs_cpu_offset(key), - cpu_type(key)); + return scnprintf(buf, size, "[%d %d %s %s]", + key->on_disk_key.k_dir_id, + key->on_disk_key.k_objectid, + reiserfs_cpu_offset(key), cpu_type(key)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh) +static int scnprintf_de_head(char *buf, size_t size, + struct reiserfs_de_head *deh) { if (deh) - sprintf(buf, - "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", - deh_offset(deh), deh_dir_id(deh), deh_objectid(deh), - deh_location(deh), deh_state(deh)); + return scnprintf(buf, size, + "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", + deh_offset(deh), deh_dir_id(deh), + deh_objectid(deh), deh_location(deh), + deh_state(deh)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_item_head(char *buf, struct item_head *ih) +static int scnprintf_item_head(char *buf, size_t size, struct item_head *ih) { if (ih) { - strcpy(buf, - (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*"); - sprintf_le_key(buf + strlen(buf), &(ih->ih_key)); - sprintf(buf + strlen(buf), ", item_len %d, item_location %d, " - "free_space(entry_count) %d", - ih_item_len(ih), ih_location(ih), ih_free_space(ih)); + char *p = buf; + char * const end = buf + size; + + p += scnprintf(p, end - p, "%s", + (ih_version(ih) == KEY_FORMAT_3_6) ? + "*3.6* " : "*3.5*"); + + p += scnprintf_le_key(p, end - p, &ih->ih_key); + + p += scnprintf(p, end - p, + ", item_len %d, item_location %d, free_space(entry_count) %d", + ih_item_len(ih), ih_location(ih), + ih_free_space(ih)); + return p - buf; } else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_direntry(char *buf, struct reiserfs_dir_entry *de) +static int scnprintf_direntry(char *buf, size_t size, + struct reiserfs_dir_entry *de) { char name[20]; memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen); name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0; - sprintf(buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid); + return scnprintf(buf, size, "\"%s\"==>[%d %d]", + name, de->de_dir_id, de->de_objectid); } -static void sprintf_block_head(char *buf, struct buffer_head *bh) +static int scnprintf_block_head(char *buf, size_t size, struct buffer_head *bh) { - sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ", - B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh)); + return scnprintf(buf, size, + "level=%d, nr_items=%d, free_space=%d rdkey ", + B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh)); } -static void sprintf_buffer_head(char *buf, struct buffer_head *bh) +static int scnprintf_buffer_head(char *buf, size_t size, struct buffer_head *bh) { - sprintf(buf, - "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", - bh->b_bdev, bh->b_size, - (unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)), - bh->b_state, bh->b_page, - buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE", - buffer_dirty(bh) ? "DIRTY" : "CLEAN", - buffer_locked(bh) ? "LOCKED" : "UNLOCKED"); + return scnprintf(buf, size, + "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", + bh->b_bdev, bh->b_size, + (unsigned long long)bh->b_blocknr, + atomic_read(&(bh->b_count)), + bh->b_state, bh->b_page, + buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE", + buffer_dirty(bh) ? "DIRTY" : "CLEAN", + buffer_locked(bh) ? "LOCKED" : "UNLOCKED"); } -static void sprintf_disk_child(char *buf, struct disk_child *dc) +static int scnprintf_disk_child(char *buf, size_t size, struct disk_child *dc) { - sprintf(buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc), - dc_size(dc)); + return scnprintf(buf, size, "[dc_number=%d, dc_size=%u]", + dc_block_number(dc), dc_size(dc)); } static char *is_there_reiserfs_struct(char *fmt, int *what) @@ -189,55 +205,60 @@ static void prepare_error_buf(const char *fmt, va_list args) char *fmt1 = fmt_buf; char *k; char *p = error_buf; + char * const end = &error_buf[sizeof(error_buf)]; int what; spin_lock(&error_lock); - strcpy(fmt1, fmt); + if (WARN_ON(strscpy(fmt_buf, fmt, sizeof(fmt_buf)) < 0)) { + strscpy(error_buf, "format string too long", end - error_buf); + goto out_unlock; + } while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) { *k = 0; - p += vsprintf(p, fmt1, args); + p += vscnprintf(p, end - p, fmt1, args); switch (what) { case 'k': - sprintf_le_key(p, va_arg(args, struct reiserfs_key *)); + p += scnprintf_le_key(p, end - p, + va_arg(args, struct reiserfs_key *)); break; case 'K': - sprintf_cpu_key(p, va_arg(args, struct cpu_key *)); + p += scnprintf_cpu_key(p, end - p, + va_arg(args, struct cpu_key *)); break; case 'h': - sprintf_item_head(p, va_arg(args, struct item_head *)); + p += scnprintf_item_head(p, end - p, + va_arg(args, struct item_head *)); break; case 't': - sprintf_direntry(p, - va_arg(args, - struct reiserfs_dir_entry *)); + p += scnprintf_direntry(p, end - p, + va_arg(args, struct reiserfs_dir_entry *)); break; case 'y': - sprintf_disk_child(p, - va_arg(args, struct disk_child *)); + p += scnprintf_disk_child(p, end - p, + va_arg(args, struct disk_child *)); break; case 'z': - sprintf_block_head(p, - va_arg(args, struct buffer_head *)); + p += scnprintf_block_head(p, end - p, + va_arg(args, struct buffer_head *)); break; case 'b': - sprintf_buffer_head(p, - va_arg(args, struct buffer_head *)); + p += scnprintf_buffer_head(p, end - p, + va_arg(args, struct buffer_head *)); break; case 'a': - sprintf_de_head(p, - va_arg(args, - struct reiserfs_de_head *)); + p += scnprintf_de_head(p, end - p, + va_arg(args, struct reiserfs_de_head *)); break; } - p += strlen(p); fmt1 = k + 2; } - vsprintf(p, fmt1, args); + p += vscnprintf(p, end - p, fmt1, args); +out_unlock: spin_unlock(&error_lock); } -- GitLab From d9bb71d76c07d287f3904c463d1234e6b7b8049d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 11 Jul 2018 10:46:29 -0700 Subject: [PATCH 0233/1291] KEYS: DNS: fix parsing multiple options commit c604cb767049b78b3075497b80ebb8fd530ea2cc upstream. My recent fix for dns_resolver_preparse() printing very long strings was incomplete, as shown by syzbot which still managed to hit the WARN_ONCE() in set_precision() by adding a crafted "dns_resolver" key: precision 50001 too large WARNING: CPU: 7 PID: 864 at lib/vsprintf.c:2164 vsnprintf+0x48a/0x5a0 The bug this time isn't just a printing bug, but also a logical error when multiple options ("#"-separated strings) are given in the key payload. Specifically, when separating an option string into name and value, if there is no value then the name is incorrectly considered to end at the end of the key payload, rather than the end of the current option. This bypasses validation of the option length, and also means that specifying multiple options is broken -- which presumably has gone unnoticed as there is currently only one valid option anyway. A similar problem also applied to option values, as the kstrtoul() when parsing the "dnserror" option will read past the end of the current option and into the next option. Fix these bugs by correctly computing the length of the option name and by copying the option value, null-terminated, into a temporary buffer. Reproducer for the WARN_ONCE() that syzbot hit: perl -e 'print "#A#", "\0" x 50000' | keyctl padd dns_resolver desc @s Reproducer for "dnserror" option being parsed incorrectly (expected behavior is to fail when seeing the unknown option "foo", actual behavior was to read the dnserror value as "1#foo" and fail there): perl -e 'print "#dnserror=1#foo\0"' | keyctl padd dns_resolver desc @s Reported-by: syzbot Fixes: 4a2d789267e0 ("DNS: If the DNS server returns an error, allow that to be cached [ver #2]") Signed-off-by: Eric Biggers Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dns_resolver/dns_key.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index f0252768ecf4..5f5d9eafccf5 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -87,35 +87,39 @@ dns_resolver_preparse(struct key_preparsed_payload *prep) opt++; kdebug("options: '%s'", opt); do { + int opt_len, opt_nlen; const char *eq; - int opt_len, opt_nlen, opt_vlen, tmp; + char optval[128]; next_opt = memchr(opt, '#', end - opt) ?: end; opt_len = next_opt - opt; - if (opt_len <= 0 || opt_len > 128) { + if (opt_len <= 0 || opt_len > sizeof(optval)) { pr_warn_ratelimited("Invalid option length (%d) for dns_resolver key\n", opt_len); return -EINVAL; } - eq = memchr(opt, '=', opt_len) ?: end; - opt_nlen = eq - opt; - eq++; - opt_vlen = next_opt - eq; /* will be -1 if no value */ + eq = memchr(opt, '=', opt_len); + if (eq) { + opt_nlen = eq - opt; + eq++; + memcpy(optval, eq, next_opt - eq); + optval[next_opt - eq] = '\0'; + } else { + opt_nlen = opt_len; + optval[0] = '\0'; + } - tmp = opt_vlen >= 0 ? opt_vlen : 0; - kdebug("option '%*.*s' val '%*.*s'", - opt_nlen, opt_nlen, opt, tmp, tmp, eq); + kdebug("option '%*.*s' val '%s'", + opt_nlen, opt_nlen, opt, optval); /* see if it's an error number representing a DNS error * that's to be recorded as the result in this key */ if (opt_nlen == sizeof(DNS_ERRORNO_OPTION) - 1 && memcmp(opt, DNS_ERRORNO_OPTION, opt_nlen) == 0) { kdebug("dns error number option"); - if (opt_vlen <= 0) - goto bad_option_value; - ret = kstrtoul(eq, 10, &derrno); + ret = kstrtoul(optval, 10, &derrno); if (ret < 0) goto bad_option_value; -- GitLab From 30a7a7b04f8b4e38b1af9acda2a4dd533e260ed2 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Thu, 12 Jul 2018 08:03:43 -0700 Subject: [PATCH 0234/1291] tls: Stricter error checking in zerocopy sendmsg path commit 32da12216e467dea70a09cd7094c30779ce0f9db upstream. In the zerocopy sendmsg() path, there are error checks to revert the zerocopy if we get any error code. syzkaller has discovered that tls_push_record can return -ECONNRESET, which is fatal, and happens after the point at which it is safe to revert the iter, as we've already passed the memory to do_tcp_sendpages. Previously this code could return -ENOMEM and we would want to revert the iter, but AFAIK this no longer returns ENOMEM after a447da7d004 ("tls: fix waitall behavior in tls_sw_recvmsg"), so we fail for all error codes. Reported-by: syzbot+c226690f7b3126c5ee04@syzkaller.appspotmail.com Reported-by: syzbot+709f2810a6a05f11d4d3@syzkaller.appspotmail.com Signed-off-by: Dave Watson Fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 3c86614462f6..8ee4e667a414 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -449,7 +449,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) ret = tls_push_record(sk, msg->msg_flags, record_type); if (!ret) continue; - if (ret == -EAGAIN) + if (ret < 0) goto send_end; copied -= try_to_copy; -- GitLab From 00235ab80007696f1274eca9c0529b8f7c85def0 Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Fri, 13 Jul 2018 16:58:59 -0700 Subject: [PATCH 0235/1291] autofs: fix slab out of bounds read in getname_kernel() commit 02f51d45937f7bc7f4dee21e9f85b2d5eac37104 upstream. The autofs subsystem does not check that the "path" parameter is present for all cases where it is required when it is passed in via the "param" struct. In particular it isn't checked for the AUTOFS_DEV_IOCTL_OPENMOUNT_CMD ioctl command. To solve it, modify validate_dev_ioctl(function to check that a path has been provided for ioctl commands that require it. Link: http://lkml.kernel.org/r/153060031527.26631.18306637892746301555.stgit@pluto.themaw.net Signed-off-by: Tomas Bortoli Signed-off-by: Ian Kent Reported-by: syzbot+60c837b428dc84e83a93@syzkaller.appspotmail.com Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/autofs4/dev-ioctl.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index b7c816f39404..6dd63981787a 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -148,6 +148,15 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) cmd); goto out; } + } else { + unsigned int inr = _IOC_NR(cmd); + + if (inr == AUTOFS_DEV_IOCTL_OPENMOUNT_CMD || + inr == AUTOFS_DEV_IOCTL_REQUESTER_CMD || + inr == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) { + err = -EINVAL; + goto out; + } } err = 0; @@ -284,7 +293,8 @@ static int autofs_dev_ioctl_openmount(struct file *fp, dev_t devid; int err, fd; - /* param->path has already been checked */ + /* param->path has been checked in validate_dev_ioctl() */ + if (!param->openmount.devid) return -EINVAL; @@ -446,10 +456,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, dev_t devid; int err = -ENOENT; - if (param->size <= AUTOFS_DEV_IOCTL_SIZE) { - err = -EINVAL; - goto out; - } + /* param->path has been checked in validate_dev_ioctl() */ devid = sbi->sb->s_dev; @@ -534,10 +541,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, unsigned int devid, magic; int err = -ENOENT; - if (param->size <= AUTOFS_DEV_IOCTL_SIZE) { - err = -EINVAL; - goto out; - } + /* param->path has been checked in validate_dev_ioctl() */ name = param->path; type = param->ismountpoint.in.type; -- GitLab From b124e97f3ef528e81f28c783b6ddce5ee0faf119 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 11 Jul 2018 12:00:44 -0400 Subject: [PATCH 0236/1291] nsh: set mac len based on inner packet commit bab2c80e5a6c855657482eac9e97f5f3eedb509a upstream. When pulling the NSH header in nsh_gso_segment, set the mac length based on the encapsulated packet type. skb_reset_mac_len computes an offset to the network header, which here still points to the outer packet: > skb_reset_network_header(skb); > [...] > __skb_pull(skb, nsh_len); > skb_reset_mac_header(skb); // now mac hdr starts nsh_len == 8B after net hdr > skb_reset_mac_len(skb); // mac len = net hdr - mac hdr == (u16) -8 == 65528 > [..] > skb_mac_gso_segment(skb, ..) Link: http://lkml.kernel.org/r/CAF=yD-KeAcTSOn4AxirAxL8m7QAS8GBBe1w09eziYwvPbbUeYA@mail.gmail.com Reported-by: syzbot+7b9ed9872dab8c32305d@syzkaller.appspotmail.com Fixes: c411ed854584 ("nsh: add GSO support") Signed-off-by: Willem de Bruijn Acked-by: Jiri Benc Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nsh/nsh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c index 6df6f58a8103..5647905c88d6 100644 --- a/net/nsh/nsh.c +++ b/net/nsh/nsh.c @@ -42,7 +42,7 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, __skb_pull(skb, nsh_len); skb_reset_mac_header(skb); - skb_reset_mac_len(skb); + skb->mac_len = proto == htons(ETH_P_TEB) ? ETH_HLEN : 0; skb->protocol = proto; features &= NETIF_F_SG; -- GitLab From 28c74ff85efd192aeca9005499ca50c24d795f61 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 9 Jul 2018 13:43:38 +0200 Subject: [PATCH 0237/1291] netfilter: ipv6: nf_defrag: drop skb dst before queueing commit 84379c9afe011020e797e3f50a662b08a6355dcf upstream. Eric Dumazet reports: Here is a reproducer of an annoying bug detected by syzkaller on our production kernel [..] ./b78305423 enable_conntrack Then : sleep 60 dmesg | tail -10 [ 171.599093] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 181.631024] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 191.687076] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 201.703037] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 211.711072] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 221.959070] unregister_netdevice: waiting for lo to become free. Usage count = 2 Reproducer sends ipv6 fragment that hits nfct defrag via LOCAL_OUT hook. skb gets queued until frag timer expiry -- 1 minute. Normally nf_conntrack_reasm gets called during prerouting, so skb has no dst yet which might explain why this wasn't spotted earlier. Reported-by: Eric Dumazet Reported-by: John Sperbeck Signed-off-by: Florian Westphal Tested-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 64ec23388450..722a9db8c6a7 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -618,6 +618,8 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) fq->q.meat == fq->q.len && nf_ct_frag6_reasm(fq, skb, dev)) ret = 0; + else + skb_dst_drop(skb); out_unlock: spin_unlock_bh(&fq->q.lock); -- GitLab From 1bbe05e27af1d67b0c9c113d1f62373e933205ec Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 18 Jun 2018 15:46:58 +0200 Subject: [PATCH 0238/1291] bdi: Fix another oops in wb_workfn() commit 3ee7e8697d5860b173132606d80a9cd35e7113ee upstream. syzbot is reporting NULL pointer dereference at wb_workfn() [1] due to wb->bdi->dev being NULL. And Dmitry confirmed that wb->state was WB_shutting_down after wb->bdi->dev became NULL. This indicates that unregister_bdi() failed to call wb_shutdown() on one of wb objects. The problem is in cgwb_bdi_unregister() which does cgwb_kill() and thus drops bdi's reference to wb structures before going through the list of wbs again and calling wb_shutdown() on each of them. This way the loop iterating through all wbs can easily miss a wb if that wb has already passed through cgwb_remove_from_bdi_list() called from wb_shutdown() from cgwb_release_workfn() and as a result fully shutdown bdi although wb_workfn() for this wb structure is still running. In fact there are also other ways cgwb_bdi_unregister() can race with cgwb_release_workfn() leading e.g. to use-after-free issues: CPU1 CPU2 cgwb_bdi_unregister() cgwb_kill(*slot); cgwb_release() queue_work(cgwb_release_wq, &wb->release_work); cgwb_release_workfn() wb = list_first_entry(&bdi->wb_list, ...) spin_unlock_irq(&cgwb_lock); wb_shutdown(wb); ... kfree_rcu(wb, rcu); wb_shutdown(wb); -> oops use-after-free We solve these issues by synchronizing writeback structure shutdown from cgwb_bdi_unregister() with cgwb_release_workfn() using a new mutex. That way we also no longer need synchronization using WB_shutting_down as the mutex provides it for CONFIG_CGROUP_WRITEBACK case and without CONFIG_CGROUP_WRITEBACK wb_shutdown() can be called only once from bdi_unregister(). Reported-by: syzbot Acked-by: Tejun Heo Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- include/linux/backing-dev-defs.h | 2 +- mm/backing-dev.c | 20 +++++++------------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index eac387a3bfef..3c1beffc861a 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -22,7 +22,6 @@ struct dentry; */ enum wb_state { WB_registered, /* bdi_register() was done */ - WB_shutting_down, /* wb_shutdown() in progress */ WB_writeback_running, /* Writeback is in progress */ WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */ }; @@ -165,6 +164,7 @@ struct backing_dev_info { #ifdef CONFIG_CGROUP_WRITEBACK struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ struct rb_root cgwb_congested_tree; /* their congested states */ + struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */ #else struct bdi_writeback_congested *wb_congested; #endif diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 6774e0369ebe..9386c98dac12 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -356,15 +356,8 @@ static void wb_shutdown(struct bdi_writeback *wb) spin_lock_bh(&wb->work_lock); if (!test_and_clear_bit(WB_registered, &wb->state)) { spin_unlock_bh(&wb->work_lock); - /* - * Wait for wb shutdown to finish if someone else is just - * running wb_shutdown(). Otherwise we could proceed to wb / - * bdi destruction before wb_shutdown() is finished. - */ - wait_on_bit(&wb->state, WB_shutting_down, TASK_UNINTERRUPTIBLE); return; } - set_bit(WB_shutting_down, &wb->state); spin_unlock_bh(&wb->work_lock); cgwb_remove_from_bdi_list(wb); @@ -376,12 +369,6 @@ static void wb_shutdown(struct bdi_writeback *wb) mod_delayed_work(bdi_wq, &wb->dwork, 0); flush_delayed_work(&wb->dwork); WARN_ON(!list_empty(&wb->work_list)); - /* - * Make sure bit gets cleared after shutdown is finished. Matches with - * the barrier provided by test_and_clear_bit() above. - */ - smp_wmb(); - clear_and_wake_up_bit(WB_shutting_down, &wb->state); } static void wb_exit(struct bdi_writeback *wb) @@ -505,10 +492,12 @@ static void cgwb_release_workfn(struct work_struct *work) struct bdi_writeback *wb = container_of(work, struct bdi_writeback, release_work); + mutex_lock(&wb->bdi->cgwb_release_mutex); wb_shutdown(wb); css_put(wb->memcg_css); css_put(wb->blkcg_css); + mutex_unlock(&wb->bdi->cgwb_release_mutex); fprop_local_destroy_percpu(&wb->memcg_completions); percpu_ref_exit(&wb->refcnt); @@ -694,6 +683,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); bdi->cgwb_congested_tree = RB_ROOT; + mutex_init(&bdi->cgwb_release_mutex); ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); if (!ret) { @@ -714,7 +704,10 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi) spin_lock_irq(&cgwb_lock); radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) cgwb_kill(*slot); + spin_unlock_irq(&cgwb_lock); + mutex_lock(&bdi->cgwb_release_mutex); + spin_lock_irq(&cgwb_lock); while (!list_empty(&bdi->wb_list)) { wb = list_first_entry(&bdi->wb_list, struct bdi_writeback, bdi_node); @@ -723,6 +716,7 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi) spin_lock_irq(&cgwb_lock); } spin_unlock_irq(&cgwb_lock); + mutex_unlock(&bdi->cgwb_release_mutex); } /** -- GitLab From a4b57440d971f4ee139a923a0b953f91c3769d70 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Thu, 14 Jun 2018 11:52:34 -0700 Subject: [PATCH 0239/1291] rds: avoid unenecessary cong_update in loop transport commit f1693c63ab133d16994cc50f773982b5905af264 upstream. Loop transport which is self loopback, remote port congestion update isn't relevant. Infact the xmit path already ignores it. Receive path needs to do the same. Reported-by: syzbot+4c20b3866171ce8441d2@syzkaller.appspotmail.com Reviewed-by: Sowmini Varadhan Signed-off-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/loop.c | 1 + net/rds/rds.h | 5 +++++ net/rds/recv.c | 5 +++++ 3 files changed, 11 insertions(+) diff --git a/net/rds/loop.c b/net/rds/loop.c index f2bf78de5688..dac6218a460e 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -193,4 +193,5 @@ struct rds_transport rds_loop_transport = { .inc_copy_to_user = rds_message_inc_copy_to_user, .inc_free = rds_loop_inc_free, .t_name = "loopback", + .t_type = RDS_TRANS_LOOP, }; diff --git a/net/rds/rds.h b/net/rds/rds.h index d09f6c1facb4..f685d8b514e5 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -454,6 +454,11 @@ struct rds_notifier { int n_status; }; +/* Available as part of RDS core, so doesn't need to participate + * in get_preferred transport etc + */ +#define RDS_TRANS_LOOP 3 + /** * struct rds_transport - transport specific behavioural hooks * diff --git a/net/rds/recv.c b/net/rds/recv.c index 555f07ccf0dc..c27cceae52e1 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -103,6 +103,11 @@ static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk, rds_stats_add(s_recv_bytes_added_to_socket, delta); else rds_stats_add(s_recv_bytes_removed_from_socket, -delta); + + /* loop transport doesn't send/recv congestion updates */ + if (rs->rs_transport->t_type == RDS_TRANS_LOOP) + return; + now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs); rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d " -- GitLab From 115df2a7c5ba248360e4f92d78102c34f45977ee Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 18 Jul 2018 18:57:27 +0900 Subject: [PATCH 0240/1291] net/nfc: Avoid stalls when nfc_alloc_send_skb() returned NULL. commit 3bc53be9db21040b5d2de4d455f023c8c494aa68 upstream. syzbot is reporting stalls at nfc_llcp_send_ui_frame() [1]. This is because nfc_llcp_send_ui_frame() is retrying the loop without any delay when nonblocking nfc_alloc_send_skb() returned NULL. Since there is no need to use MSG_DONTWAIT if we retry until sock_alloc_send_pskb() succeeds, let's use blocking call. Also, in case an unexpected error occurred, let's break the loop if blocking nfc_alloc_send_skb() failed. [1] https://syzkaller.appspot.com/bug?id=4a131cc571c3733e0eff6bc673f4e36ae48f19c6 Signed-off-by: Tetsuo Handa Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nfc/llcp_commands.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 2ceefa183cee..6a196e438b6c 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -752,11 +752,14 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap, pr_debug("Fragment %zd bytes remaining %zd", frag_len, remaining_len); - pdu = nfc_alloc_send_skb(sock->dev, &sock->sk, MSG_DONTWAIT, + pdu = nfc_alloc_send_skb(sock->dev, &sock->sk, 0, frag_len + LLCP_HEADER_SIZE, &err); if (pdu == NULL) { - pr_err("Could not allocate PDU\n"); - continue; + pr_err("Could not allocate PDU (error=%d)\n", err); + len -= remaining_len; + if (len == 0) + len = err; + break; } pdu = llcp_add_header(pdu, dsap, ssap, LLCP_PDU_UI); -- GitLab From ed812b882599c0ec27a9f58831fc6bfb4efdac80 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 20 Jul 2018 10:52:51 +0100 Subject: [PATCH 0241/1291] KVM: arm64: Store vcpu on the stack during __guest_enter() Commit 32b03d1059667a39e089c45ee38ec9c16332430f upstream. KVM uses tpidr_el2 as its private vcpu register, which makes sense for non-vhe world switch as only KVM can access this register. This means vhe Linux has to use tpidr_el1, which KVM has to save/restore as part of the host context. If the SDEI handler code runs behind KVMs back, it mustn't access any per-cpu variables. To allow this on systems with vhe we need to make the host use tpidr_el2, saving KVM from save/restoring it. __guest_enter() stores the host_ctxt on the stack, do the same with the vcpu. Signed-off-by: James Morse Reviewed-by: Christoffer Dall Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/hyp/entry.S | 10 +++++++--- arch/arm64/kvm/hyp/hyp-entry.S | 6 +++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 9c45c6af1f58..fe4678f20a85 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -62,8 +62,8 @@ ENTRY(__guest_enter) // Store the host regs save_callee_saved_regs x1 - // Store the host_ctxt for use at exit time - str x1, [sp, #-16]! + // Store host_ctxt and vcpu for use at exit time + stp x1, x0, [sp, #-16]! add x18, x0, #VCPU_CONTEXT @@ -159,6 +159,10 @@ abort_guest_exit_end: ENDPROC(__guest_exit) ENTRY(__fpsimd_guest_restore) + // x0: esr + // x1: vcpu + // x2-x29,lr: vcpu regs + // vcpu x0-x1 on the stack stp x2, x3, [sp, #-16]! stp x4, lr, [sp, #-16]! @@ -173,7 +177,7 @@ alternative_else alternative_endif isb - mrs x3, tpidr_el2 + mov x3, x1 ldr x0, [x3, #VCPU_HOST_CONTEXT] kern_hyp_va x0 diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index f49b53331d28..fc1613955090 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -120,6 +120,7 @@ el1_trap: /* * x0: ESR_EC */ + ldr x1, [sp, #16 + 8] // vcpu stored by __guest_enter /* * We trap the first access to the FP/SIMD to save the host context @@ -132,19 +133,18 @@ alternative_if_not ARM64_HAS_NO_FPSIMD b.eq __fpsimd_guest_restore alternative_else_nop_endif - mrs x1, tpidr_el2 mov x0, #ARM_EXCEPTION_TRAP b __guest_exit el1_irq: stp x0, x1, [sp, #-16]! - mrs x1, tpidr_el2 + ldr x1, [sp, #16 + 8] mov x0, #ARM_EXCEPTION_IRQ b __guest_exit el1_error: stp x0, x1, [sp, #-16]! - mrs x1, tpidr_el2 + ldr x1, [sp, #16 + 8] mov x0, #ARM_EXCEPTION_EL1_SERROR b __guest_exit -- GitLab From 8ad56472d67cf8ce638876558d36f3c83975294a Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 20 Jul 2018 10:52:52 +0100 Subject: [PATCH 0242/1291] KVM: arm/arm64: Convert kvm_host_cpu_state to a static per-cpu allocation Commit 36989e7fd386a9a5822c48691473863f8fbb404d upstream. kvm_host_cpu_state is a per-cpu allocation made from kvm_arch_init() used to store the host EL1 registers when KVM switches to a guest. Make it easier for ASM to generate pointers into this per-cpu memory by making it a static allocation. Signed-off-by: James Morse Acked-by: Christoffer Dall Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/arm.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 9bee849db682..4aede98b92eb 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -51,8 +51,8 @@ __asm__(".arch_extension virt"); #endif +DEFINE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); -static kvm_cpu_context_t __percpu *kvm_host_cpu_state; /* Per-CPU variable containing the currently running vcpu. */ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); @@ -351,7 +351,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } vcpu->cpu = cpu; - vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); + vcpu->arch.host_cpu_context = this_cpu_ptr(&kvm_host_cpu_state); kvm_arm_set_running_vcpu(vcpu); @@ -1259,19 +1259,8 @@ static inline void hyp_cpu_pm_exit(void) } #endif -static void teardown_common_resources(void) -{ - free_percpu(kvm_host_cpu_state); -} - static int init_common_resources(void) { - kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t); - if (!kvm_host_cpu_state) { - kvm_err("Cannot allocate host CPU state\n"); - return -ENOMEM; - } - /* set size of VMID supported by CPU */ kvm_vmid_bits = kvm_get_vmid_bits(); kvm_info("%d-bit VMID\n", kvm_vmid_bits); @@ -1413,7 +1402,7 @@ static int init_hyp_mode(void) for_each_possible_cpu(cpu) { kvm_cpu_context_t *cpu_ctxt; - cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu); + cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu); err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); if (err) { @@ -1497,7 +1486,6 @@ int kvm_arch_init(void *opaque) if (!in_hyp_mode) teardown_hyp_mode(); out_err: - teardown_common_resources(); return err; } -- GitLab From 6256b86e851942f4ec145d36a0c8f3673390b9d9 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 20 Jul 2018 10:52:53 +0100 Subject: [PATCH 0243/1291] KVM: arm64: Change hyp_panic()s dependency on tpidr_el2 Commit c97e166e54b662717d20ec2e36761758d2b6a7c2 upstream. Make tpidr_el2 a cpu-offset for per-cpu variables in the same way the host uses tpidr_el1. This lets tpidr_el{1,2} have the same value, and on VHE they can be the same register. KVM calls hyp_panic() when anything unexpected happens. This may occur while a guest owns the EL1 registers. KVM stashes the vcpu pointer in tpidr_el2, which it uses to find the host context in order to restore the host EL1 registers before parachuting into the host's panic(). The host context is a struct kvm_cpu_context allocated in the per-cpu area, and mapped to hyp. Given the per-cpu offset for this CPU, this is easy to find. Change hyp_panic() to take a pointer to the struct kvm_cpu_context. Wrap these calls with an asm function that retrieves the struct kvm_cpu_context from the host's per-cpu area. Copy the per-cpu offset from the hosts tpidr_el1 into tpidr_el2 during kvm init. (Later patches will make this unnecessary for VHE hosts) We print out the vcpu pointer as part of the panic message. Add a back reference to the 'running vcpu' in the host cpu context to preserve this. Signed-off-by: James Morse Reviewed-by: Christoffer Dall Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/hyp/hyp-entry.S | 12 ++++++++++++ arch/arm64/kvm/hyp/s2-setup.c | 3 +++ arch/arm64/kvm/hyp/switch.c | 25 +++++++++++++------------ 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8abec9f7f430..7b373e56750f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -194,6 +194,8 @@ struct kvm_cpu_context { u64 sys_regs[NR_SYS_REGS]; u32 copro[NR_COPRO_REGS]; }; + + struct kvm_vcpu *__hyp_running_vcpu; }; typedef struct kvm_cpu_context kvm_cpu_context_t; diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index fc1613955090..f36464bd57c5 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -179,6 +179,18 @@ ENTRY(__hyp_do_panic) eret ENDPROC(__hyp_do_panic) +ENTRY(__hyp_panic) + /* + * '=kvm_host_cpu_state' is a host VA from the constant pool, it may + * not be accessible by this address from EL2, hyp_panic() converts + * it with kern_hyp_va() before use. + */ + ldr x0, =kvm_host_cpu_state + mrs x1, tpidr_el2 + add x0, x0, x1 + b hyp_panic +ENDPROC(__hyp_panic) + .macro invalid_vector label, target = __hyp_panic .align 2 \label: diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c index a81f5e10fc8c..7fb88274eba1 100644 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ b/arch/arm64/kvm/hyp/s2-setup.c @@ -84,5 +84,8 @@ u32 __hyp_text __init_stage2_translation(void) write_sysreg(val, vtcr_el2); + /* copy tpidr_el1 into tpidr_el2 for use by HYP */ + write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); + return parange; } diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index e08ae6b6b63e..8875bca6868a 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -289,9 +289,9 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) u64 exit_code; vcpu = kern_hyp_va(vcpu); - write_sysreg(vcpu, tpidr_el2); host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; __sysreg_save_host_state(host_ctxt); @@ -406,7 +406,8 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; -static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par) +static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, + struct kvm_vcpu *vcpu) { unsigned long str_va; @@ -420,35 +421,35 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par) __hyp_do_panic(str_va, spsr, elr, read_sysreg(esr_el2), read_sysreg_el2(far), - read_sysreg(hpfar_el2), par, - (void *)read_sysreg(tpidr_el2)); + read_sysreg(hpfar_el2), par, vcpu); } -static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par) +static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, + struct kvm_vcpu *vcpu) { panic(__hyp_panic_string, spsr, elr, read_sysreg_el2(esr), read_sysreg_el2(far), - read_sysreg(hpfar_el2), par, - (void *)read_sysreg(tpidr_el2)); + read_sysreg(hpfar_el2), par, vcpu); } static hyp_alternate_select(__hyp_call_panic, __hyp_call_panic_nvhe, __hyp_call_panic_vhe, ARM64_HAS_VIRT_HOST_EXTN); -void __hyp_text __noreturn __hyp_panic(void) +void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt) { + struct kvm_vcpu *vcpu = NULL; + u64 spsr = read_sysreg_el2(spsr); u64 elr = read_sysreg_el2(elr); u64 par = read_sysreg(par_el1); if (read_sysreg(vttbr_el2)) { - struct kvm_vcpu *vcpu; struct kvm_cpu_context *host_ctxt; - vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2); - host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + host_ctxt = kern_hyp_va(__host_ctxt); + vcpu = host_ctxt->__hyp_running_vcpu; __timer_save_state(vcpu); __deactivate_traps(vcpu); __deactivate_vm(vcpu); @@ -456,7 +457,7 @@ void __hyp_text __noreturn __hyp_panic(void) } /* Call panic for real */ - __hyp_call_panic()(spsr, elr, par); + __hyp_call_panic()(spsr, elr, par, vcpu); unreachable(); } -- GitLab From 0dac9f10d9525f9617eaf383e1629ae976e2f170 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 20 Jul 2018 10:52:54 +0100 Subject: [PATCH 0244/1291] arm64: alternatives: use tpidr_el2 on VHE hosts Commit 6d99b68933fbcf51f84fcbba49246ce1209ec193 upstream. Now that KVM uses tpidr_el2 in the same way as Linux's cpu_offset in tpidr_el1, merge the two. This saves KVM from save/restoring tpidr_el1 on VHE hosts, and allows future code to blindly access per-cpu variables without triggering world-switch. Signed-off-by: James Morse Reviewed-by: Christoffer Dall Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/alternative.h | 2 ++ arch/arm64/include/asm/assembler.h | 8 ++++++++ arch/arm64/include/asm/percpu.h | 11 +++++++++-- arch/arm64/kernel/alternative.c | 9 +++++---- arch/arm64/kernel/cpufeature.c | 17 +++++++++++++++++ arch/arm64/mm/proc.S | 8 ++++++++ 6 files changed, 49 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 4a85c6952a22..669028172fd6 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -12,6 +12,8 @@ #include #include +extern int alternatives_applied; + struct alt_instr { s32 orig_offset; /* offset to original instruction */ s32 alt_offset; /* offset to replacement instruction */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 25b2a4161c7a..66aea4aa455d 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -260,7 +260,11 @@ lr .req x30 // link register #else adr_l \dst, \sym #endif +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN mrs \tmp, tpidr_el1 +alternative_else + mrs \tmp, tpidr_el2 +alternative_endif add \dst, \dst, \tmp .endm @@ -271,7 +275,11 @@ lr .req x30 // link register */ .macro ldr_this_cpu dst, sym, tmp adr_l \dst, \sym +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN mrs \tmp, tpidr_el1 +alternative_else + mrs \tmp, tpidr_el2 +alternative_endif ldr \dst, [\dst, \tmp] .endm diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 3bd498e4de4c..43393208229e 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -16,11 +16,15 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H +#include #include static inline void set_my_cpu_offset(unsigned long off) { - asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); + asm volatile(ALTERNATIVE("msr tpidr_el1, %0", + "msr tpidr_el2, %0", + ARM64_HAS_VIRT_HOST_EXTN) + :: "r" (off) : "memory"); } static inline unsigned long __my_cpu_offset(void) @@ -31,7 +35,10 @@ static inline unsigned long __my_cpu_offset(void) * We want to allow caching the value, so avoid using volatile and * instead use a fake stack read to hazard against barrier(). */ - asm("mrs %0, tpidr_el1" : "=r" (off) : + asm(ALTERNATIVE("mrs %0, tpidr_el1", + "mrs %0, tpidr_el2", + ARM64_HAS_VIRT_HOST_EXTN) + : "=r" (off) : "Q" (*(const unsigned long *)current_stack_pointer)); return off; diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 6dd0a3a3e5c9..414288a558c8 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -32,6 +32,8 @@ #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) +int alternatives_applied; + struct alt_region { struct alt_instr *begin; struct alt_instr *end; @@ -143,7 +145,6 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias) */ static int __apply_alternatives_multi_stop(void *unused) { - static int patched = 0; struct alt_region region = { .begin = (struct alt_instr *)__alt_instructions, .end = (struct alt_instr *)__alt_instructions_end, @@ -151,14 +152,14 @@ static int __apply_alternatives_multi_stop(void *unused) /* We always have a CPU 0 at this point (__init) */ if (smp_processor_id()) { - while (!READ_ONCE(patched)) + while (!READ_ONCE(alternatives_applied)) cpu_relax(); isb(); } else { - BUG_ON(patched); + BUG_ON(alternatives_applied); __apply_alternatives(®ion, true); /* Barriers provided by the cache flushing */ - WRITE_ONCE(patched, 1); + WRITE_ONCE(alternatives_applied, 1); } return 0; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 718822ab6e4b..376cf12edf0c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -880,6 +880,22 @@ static int __init parse_kpti(char *str) early_param("kpti", parse_kpti); #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ +static int cpu_copy_el2regs(void *__unused) +{ + /* + * Copy register values that aren't redirected by hardware. + * + * Before code patching, we only set tpidr_el1, all CPUs need to copy + * this value to tpidr_el2 before we patch the code. Once we've done + * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to + * do anything here. + */ + if (!alternatives_applied) + write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); + + return 0; +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -949,6 +965,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_VIRT_HOST_EXTN, .def_scope = SCOPE_SYSTEM, .matches = runs_at_el2, + .enable = cpu_copy_el2regs, }, { .desc = "32-bit EL0 Support", diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index bf0821b7b1ab..10c835f13f62 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -70,7 +70,11 @@ ENTRY(cpu_do_suspend) mrs x8, mdscr_el1 mrs x9, oslsr_el1 mrs x10, sctlr_el1 +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN mrs x11, tpidr_el1 +alternative_else + mrs x11, tpidr_el2 +alternative_endif mrs x12, sp_el0 stp x2, x3, [x0] stp x4, xzr, [x0, #16] @@ -116,7 +120,11 @@ ENTRY(cpu_do_resume) msr mdscr_el1, x10 msr sctlr_el1, x12 +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN msr tpidr_el1, x13 +alternative_else + msr tpidr_el2, x13 +alternative_endif msr sp_el0, x14 /* * Restore oslsr_el1 by writing oslar_el1 -- GitLab From 286950e0831b9bf7180c09b2dd06ec88922fcc12 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 20 Jul 2018 10:52:55 +0100 Subject: [PATCH 0245/1291] KVM: arm64: Stop save/restoring host tpidr_el1 on VHE Commit 1f742679c33bc083722cb0b442a95d458c491b56 upstream. Now that a VHE host uses tpidr_el2 for the cpu offset we no longer need KVM to save/restore tpidr_el1. Move this from the 'common' code into the non-vhe code. While we're at it, on VHE we don't need to save the ELR or SPSR as kernel_entry in entry.S will have pushed these onto the kernel stack, and will restore them from there. Move these to the non-vhe code as we need them to get back to the host. Finally remove the always-copy-tpidr we hid in the stage2 setup code, cpufeature's enable callback will do this for VHE, we only need KVM to do it for non-vhe. Add the copy into kvm-init instead. Signed-off-by: James Morse Reviewed-by: Christoffer Dall Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/hyp-init.S | 4 ++++ arch/arm64/kvm/hyp/s2-setup.c | 3 --- arch/arm64/kvm/hyp/sysreg-sr.c | 16 ++++++++-------- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 870828c364c5..dea20651a5f1 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -122,6 +122,10 @@ CPU_BE( orr x4, x4, #SCTLR_ELx_EE) kern_hyp_va x2 msr vbar_el2, x2 + /* copy tpidr_el1 into tpidr_el2 for use by HYP */ + mrs x1, tpidr_el1 + msr tpidr_el2, x1 + /* Hello, World! */ eret ENDPROC(__kvm_hyp_init) diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c index 7fb88274eba1..a81f5e10fc8c 100644 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ b/arch/arm64/kvm/hyp/s2-setup.c @@ -84,8 +84,5 @@ u32 __hyp_text __init_stage2_translation(void) write_sysreg(val, vtcr_el2); - /* copy tpidr_el1 into tpidr_el2 for use by HYP */ - write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); - return parange; } diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 934137647837..c54cc2afb92b 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -27,8 +27,8 @@ static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { } /* * Non-VHE: Both host and guest must save everything. * - * VHE: Host must save tpidr*_el[01], actlr_el1, mdscr_el1, sp0, pc, - * pstate, and guest must save everything. + * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0, + * and guest must save everything. */ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) @@ -36,11 +36,8 @@ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); - ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); - ctxt->gp_regs.regs.pc = read_sysreg_el2(elr); - ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr); } static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) @@ -62,10 +59,13 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair); ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl); ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); + ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr); ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr); + ctxt->gp_regs.regs.pc = read_sysreg_el2(elr); + ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr); } static hyp_alternate_select(__sysreg_call_save_host_state, @@ -89,11 +89,8 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); - write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); - write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); - write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr); } static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) @@ -115,10 +112,13 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair); write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl); write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); + write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); write_sysreg_el1(ctxt->gp_regs.elr_el1, elr); write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr); + write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); + write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr); } static hyp_alternate_select(__sysreg_call_restore_host_state, -- GitLab From e77175fafa7dd941873dc248ab0f20aa56fe3431 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:52:56 +0100 Subject: [PATCH 0246/1291] arm64: alternatives: Add dynamic patching feature Commit dea5e2a4c5bcf196f879a66cebdcca07793e8ba4 upstream. We've so far relied on a patching infrastructure that only gave us a single alternative, without any way to provide a range of potential replacement instructions. For a single feature, this is an all or nothing thing. It would be interesting to have a more flexible grained way of patching the kernel though, where we could dynamically tune the code that gets injected. In order to achive this, let's introduce a new form of dynamic patching, assiciating a callback to a patching site. This callback gets source and target locations of the patching request, as well as the number of instructions to be patched. Dynamic patching is declared with the new ALTERNATIVE_CB and alternative_cb directives: asm volatile(ALTERNATIVE_CB("mov %0, #0\n", callback) : "r" (v)); or alternative_cb callback mov x0, #0 alternative_cb_end where callback is the C function computing the alternative. Reviewed-by: Christoffer Dall Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/alternative.h | 41 +++++++++++++++++++++++--- arch/arm64/kernel/alternative.c | 43 +++++++++++++++++++++------- 2 files changed, 69 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 669028172fd6..a91933b1e2e6 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -5,6 +5,8 @@ #include #include +#define ARM64_CB_PATCH ARM64_NCAPS + #ifndef __ASSEMBLY__ #include @@ -22,12 +24,19 @@ struct alt_instr { u8 alt_len; /* size of new instruction(s), <= orig_len */ }; +typedef void (*alternative_cb_t)(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); + void __init apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); -#define ALTINSTR_ENTRY(feature) \ +#define ALTINSTR_ENTRY(feature,cb) \ " .word 661b - .\n" /* label */ \ + " .if " __stringify(cb) " == 0\n" \ " .word 663f - .\n" /* new instruction */ \ + " .else\n" \ + " .word " __stringify(cb) "- .\n" /* callback */ \ + " .endif\n" \ " .hword " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -45,15 +54,18 @@ void apply_alternatives(void *start, size_t length); * but most assemblers die if insn1 or insn2 have a .inst. This should * be fixed in a binutils release posterior to 2.25.51.0.2 (anything * containing commit 4e4d08cf7399b606 or c1baaddf8861). + * + * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature) \ + ALTINSTR_ENTRY(feature,cb) \ ".popsection\n" \ + " .if " __stringify(cb) " == 0\n" \ ".pushsection .altinstr_replacement, \"a\"\n" \ "663:\n\t" \ newinstr "\n" \ @@ -61,11 +73,17 @@ void apply_alternatives(void *start, size_t length); ".popsection\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ ".org . - (662b-661b) + (664b-663b)\n" \ + ".else\n\t" \ + "663:\n\t" \ + "664:\n\t" \ + ".endif\n" \ ".endif\n" #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0) +#define ALTERNATIVE_CB(oldinstr, cb) \ + __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb) #else #include @@ -132,6 +150,14 @@ void apply_alternatives(void *start, size_t length); 661: .endm +.macro alternative_cb cb + .set .Lasm_alt_mode, 0 + .pushsection .altinstructions, "a" + altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0 + .popsection +661: +.endm + /* * Provide the other half of the alternative code sequence. */ @@ -157,6 +183,13 @@ void apply_alternatives(void *start, size_t length); .org . - (662b-661b) + (664b-663b) .endm +/* + * Callback-based alternative epilogue + */ +.macro alternative_cb_end +662: +.endm + /* * Provides a trivial alternative or default sequence consisting solely * of NOPs. The number of NOPs is chosen automatically to match the diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 414288a558c8..5c4bce4ac381 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -107,32 +107,53 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp return insn; } +static void patch_alternative(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + __le32 *replptr; + int i; + + replptr = ALT_REPL_PTR(alt); + for (i = 0; i < nr_inst; i++) { + u32 insn; + + insn = get_alt_insn(alt, origptr + i, replptr + i); + updptr[i] = cpu_to_le32(insn); + } +} + static void __apply_alternatives(void *alt_region, bool use_linear_alias) { struct alt_instr *alt; struct alt_region *region = alt_region; - __le32 *origptr, *replptr, *updptr; + __le32 *origptr, *updptr; + alternative_cb_t alt_cb; for (alt = region->begin; alt < region->end; alt++) { - u32 insn; - int i, nr_inst; + int nr_inst; - if (!cpus_have_cap(alt->cpufeature)) + /* Use ARM64_CB_PATCH as an unconditional patch */ + if (alt->cpufeature < ARM64_CB_PATCH && + !cpus_have_cap(alt->cpufeature)) continue; - BUG_ON(alt->alt_len != alt->orig_len); + if (alt->cpufeature == ARM64_CB_PATCH) + BUG_ON(alt->alt_len != 0); + else + BUG_ON(alt->alt_len != alt->orig_len); pr_info_once("patching kernel code\n"); origptr = ALT_ORIG_PTR(alt); - replptr = ALT_REPL_PTR(alt); updptr = use_linear_alias ? lm_alias(origptr) : origptr; - nr_inst = alt->alt_len / sizeof(insn); + nr_inst = alt->orig_len / AARCH64_INSN_SIZE; - for (i = 0; i < nr_inst; i++) { - insn = get_alt_insn(alt, origptr + i, replptr + i); - updptr[i] = cpu_to_le32(insn); - } + if (alt->cpufeature < ARM64_CB_PATCH) + alt_cb = patch_alternative; + else + alt_cb = ALT_REPL_PTR(alt); + + alt_cb(alt, origptr, updptr, nr_inst); flush_icache_range((uintptr_t)origptr, (uintptr_t)(origptr + nr_inst)); -- GitLab From dca7815605aff032d0b7f9c4f1d98af0e529cdee Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:52:57 +0100 Subject: [PATCH 0247/1291] KVM: arm/arm64: Do not use kern_hyp_va() with kvm_vgic_global_state Commit 44a497abd621a71c645f06d3d545ae2f46448830 upstream. kvm_vgic_global_state is part of the read-only section, and is usually accessed using a PC-relative address generation (adrp + add). It is thus useless to use kern_hyp_va() on it, and actively problematic if kern_hyp_va() becomes non-idempotent. On the other hand, there is no way that the compiler is going to guarantee that such access is always PC relative. So let's bite the bullet and provide our own accessor. Acked-by: Catalin Marinas Reviewed-by: James Morse Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_mmu.h | 7 +++++++ arch/arm64/include/asm/kvm_mmu.h | 20 ++++++++++++++++++++ virt/kvm/arm/hyp/vgic-v2-sr.c | 2 +- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 08cd720eae01..4d6b17e23d31 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -28,6 +28,13 @@ */ #define kern_hyp_va(kva) (kva) +/* Contrary to arm64, there is no need to generate a PC-relative address */ +#define hyp_symbol_addr(s) \ + ({ \ + typeof(s) *addr = &(s); \ + addr; \ + }) + /* * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels. */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index fe55b516f018..c84e95e0e522 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -130,6 +130,26 @@ static inline unsigned long __kern_hyp_va(unsigned long v) #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) +/* + * Obtain the PC-relative address of a kernel symbol + * s: symbol + * + * The goal of this macro is to return a symbol's address based on a + * PC-relative computation, as opposed to a loading the VA from a + * constant pool or something similar. This works well for HYP, as an + * absolute VA is guaranteed to be wrong. Only use this if trying to + * obtain the address of a symbol (i.e. not something you obtained by + * following a pointer). + */ +#define hyp_symbol_addr(s) \ + ({ \ + typeof(s) *addr; \ + asm("adrp %0, %1\n" \ + "add %0, %0, :lo12:%1\n" \ + : "=r" (addr) : "S" (&s)); \ + addr; \ + }) + /* * We currently only support a 40bit IPA. */ diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index d7fd46fe9efb..4b4221b0d4ba 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -139,7 +139,7 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) return -1; rd = kvm_vcpu_dabt_get_rd(vcpu); - addr = kern_hyp_va((kern_hyp_va(&kvm_vgic_global_state))->vcpu_base_va); + addr = kern_hyp_va(hyp_symbol_addr(kvm_vgic_global_state)->vcpu_base_va); addr += fault_ipa - vgic->vgic_cpu_base; if (kvm_vcpu_dabt_iswrite(vcpu)) { -- GitLab From 2cdc2e62a6ac829832c2bf4ccb1098fccc67f82c Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 20 Jul 2018 10:52:58 +0100 Subject: [PATCH 0248/1291] KVM: arm64: Avoid storing the vcpu pointer on the stack Commit 4464e210de9e80e38de59df052fe09ea2ff80b1b upstream. We already have the percpu area for the host cpu state, which points to the VCPU, so there's no need to store the VCPU pointer on the stack on every context switch. We can be a little more clever and just use tpidr_el2 for the percpu offset and load the VCPU pointer from the host context. This has the benefit of being able to retrieve the host context even when our stack is corrupted, and it has a potential performance benefit because we trade a store plus a load for an mrs and a load on a round trip to the guest. This does require us to calculate the percpu offset without including the offset from the kernel mapping of the percpu array to the linear mapping of the array (which is what we store in tpidr_el1), because a PC-relative generated address in EL2 is already giving us the hyp alias of the linear mapping of a kernel address. We do this in __cpu_init_hyp_mode() by using kvm_ksym_ref(). The code that accesses ESR_EL2 was previously using an alternative to use the _EL1 accessor on VHE systems, but this was actually unnecessary as the _EL1 accessor aliases the ESR_EL2 register on VHE, and the _EL2 accessor does the same thing on both systems. Cc: Ard Biesheuvel Reviewed-by: Marc Zyngier Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_asm.h | 15 +++++++++++++++ arch/arm64/include/asm/kvm_host.h | 15 +++++++++++++++ arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kvm/hyp/entry.S | 6 +----- arch/arm64/kvm/hyp/hyp-entry.S | 28 ++++++++++------------------ arch/arm64/kvm/hyp/switch.c | 5 +---- arch/arm64/kvm/hyp/sysreg-sr.c | 5 +++++ 7 files changed, 48 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index a7ef5a051911..1ed5cb9c322c 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -33,6 +33,7 @@ #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) +/* Translate a kernel address of @sym into its equivalent linear mapping */ #define kvm_ksym_ref(sym) \ ({ \ void *val = &sym; \ @@ -68,6 +69,20 @@ extern u32 __init_stage2_translation(void); extern void __qcom_hyp_sanitize_btac_predictors(void); +#else /* __ASSEMBLY__ */ + +.macro get_host_ctxt reg, tmp + adr_l \reg, kvm_host_cpu_state + mrs \tmp, tpidr_el2 + add \reg, \reg, \tmp +.endm + +.macro get_vcpu_ptr vcpu, ctxt + get_host_ctxt \ctxt, \vcpu + ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] + kern_hyp_va \vcpu +.endm + #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7b373e56750f..147389bd87b9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -350,10 +350,15 @@ int kvm_perf_teardown(void); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); +void __kvm_set_tpidr_el2(u64 tpidr_el2); +DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); + static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { + u64 tpidr_el2; + /* * Call initialization code, and switch to the full blown HYP code. * If the cpucaps haven't been finalized yet, something has gone very @@ -362,6 +367,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, */ BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr); + + /* + * Calculate the raw per-cpu offset without a translation from the + * kernel's mapping to the linear mapping, and store it in tpidr_el2 + * so that we can use adr_l to access per-cpu variables in EL2. + */ + tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state) + - (u64)kvm_ksym_ref(kvm_host_cpu_state); + + kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2); } static inline void kvm_arch_hardware_unsetup(void) {} diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index af247d10252f..2a1a04c87e9a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -136,6 +136,7 @@ int main(void) DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); #endif #ifdef CONFIG_CPU_PM DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index fe4678f20a85..a7b3c198d4de 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -62,9 +62,6 @@ ENTRY(__guest_enter) // Store the host regs save_callee_saved_regs x1 - // Store host_ctxt and vcpu for use at exit time - stp x1, x0, [sp, #-16]! - add x18, x0, #VCPU_CONTEXT // Restore guest regs x0-x17 @@ -118,8 +115,7 @@ ENTRY(__guest_exit) // Store the guest regs x19-x29, lr save_callee_saved_regs x1 - // Restore the host_ctxt from the stack - ldr x2, [sp], #16 + get_host_ctxt x2, x3 // Now restore the host regs restore_callee_saved_regs x2 diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index f36464bd57c5..82fbc368f738 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -57,13 +57,8 @@ ENDPROC(__vhe_hyp_call) el1_sync: // Guest trapped into EL2 stp x0, x1, [sp, #-16]! -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - mrs x1, esr_el2 -alternative_else - mrs x1, esr_el1 -alternative_endif - lsr x0, x1, #ESR_ELx_EC_SHIFT - + mrs x0, esr_el2 + lsr x0, x0, #ESR_ELx_EC_SHIFT cmp x0, #ESR_ELx_EC_HVC64 ccmp x0, #ESR_ELx_EC_HVC32, #4, ne b.ne el1_trap @@ -117,10 +112,14 @@ el1_hvc_guest: eret el1_trap: + get_vcpu_ptr x1, x0 + + mrs x0, esr_el2 + lsr x0, x0, #ESR_ELx_EC_SHIFT /* * x0: ESR_EC + * x1: vcpu pointer */ - ldr x1, [sp, #16 + 8] // vcpu stored by __guest_enter /* * We trap the first access to the FP/SIMD to save the host context @@ -138,13 +137,13 @@ alternative_else_nop_endif el1_irq: stp x0, x1, [sp, #-16]! - ldr x1, [sp, #16 + 8] + get_vcpu_ptr x1, x0 mov x0, #ARM_EXCEPTION_IRQ b __guest_exit el1_error: stp x0, x1, [sp, #-16]! - ldr x1, [sp, #16 + 8] + get_vcpu_ptr x1, x0 mov x0, #ARM_EXCEPTION_EL1_SERROR b __guest_exit @@ -180,14 +179,7 @@ ENTRY(__hyp_do_panic) ENDPROC(__hyp_do_panic) ENTRY(__hyp_panic) - /* - * '=kvm_host_cpu_state' is a host VA from the constant pool, it may - * not be accessible by this address from EL2, hyp_panic() converts - * it with kern_hyp_va() before use. - */ - ldr x0, =kvm_host_cpu_state - mrs x1, tpidr_el2 - add x0, x0, x1 + get_host_ctxt x0, x1 b hyp_panic ENDPROC(__hyp_panic) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 8875bca6868a..1f41b0fe59bc 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -437,7 +437,7 @@ static hyp_alternate_select(__hyp_call_panic, __hyp_call_panic_nvhe, __hyp_call_panic_vhe, ARM64_HAS_VIRT_HOST_EXTN); -void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt) +void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) { struct kvm_vcpu *vcpu = NULL; @@ -446,9 +446,6 @@ void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt) u64 par = read_sysreg(par_el1); if (read_sysreg(vttbr_el2)) { - struct kvm_cpu_context *host_ctxt; - - host_ctxt = kern_hyp_va(__host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; __timer_save_state(vcpu); __deactivate_traps(vcpu); diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index c54cc2afb92b..e19d89cabf2a 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -183,3 +183,8 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); } + +void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2) +{ + asm("msr tpidr_el2, %0": : "r" (tpidr_el2)); +} -- GitLab From 1de2719134b5fe1156d04ef2a31a1b8b2307d3ba Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:52:59 +0100 Subject: [PATCH 0249/1291] arm/arm64: smccc: Add SMCCC-specific return codes commit eff0e9e1078ea7dc1d794dc50e31baef984c46d7 upstream. We've so far used the PSCI return codes for SMCCC because they were extremely similar. But with the new ARM DEN 0070A specification, "NOT_REQUIRED" (-2) is clashing with PSCI's "PSCI_RET_INVALID_PARAMS". Let's bite the bullet and add SMCCC specific return codes. Users can be repainted as and when required. Acked-by: Will Deacon Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- include/linux/arm-smccc.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index a031897fca76..c89da86de99f 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -291,5 +291,10 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, */ #define arm_smccc_1_1_hvc(...) __arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__) +/* Return codes defined in ARM DEN 0070A */ +#define SMCCC_RET_SUCCESS 0 +#define SMCCC_RET_NOT_SUPPORTED -1 +#define SMCCC_RET_NOT_REQUIRED -2 + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/ -- GitLab From 5ad09d2abb5a0e4193b81c147efc828e13caf2ca Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:53:00 +0100 Subject: [PATCH 0250/1291] arm64: Call ARCH_WORKAROUND_2 on transitions between EL0 and EL1 commit 8e2906245f1e3b0d027169d9f2e55ce0548cb96e upstream. In order for the kernel to protect itself, let's call the SSBD mitigation implemented by the higher exception level (either hypervisor or firmware) on each transition between userspace and kernel. We must take the PSCI conduit into account in order to target the right exception level, hence the introduction of a runtime patching callback. Reviewed-by: Mark Rutland Reviewed-by: Julien Grall Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 24 ++++++++++++++++++++++++ arch/arm64/kernel/entry.S | 22 ++++++++++++++++++++++ include/linux/arm-smccc.h | 5 +++++ 3 files changed, 51 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b5a28336c077..44798a700f6c 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -228,6 +228,30 @@ static int qcom_enable_link_stack_sanitization(void *data) } #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ +#ifdef CONFIG_ARM64_SSBD +void __init arm64_update_smccc_conduit(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, + int nr_inst) +{ + u32 insn; + + BUG_ON(nr_inst != 1); + + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + insn = aarch64_insn_get_hvc_value(); + break; + case PSCI_CONDUIT_SMC: + insn = aarch64_insn_get_smc_value(); + break; + default: + return; + } + + *updptr = cpu_to_le32(insn); +} +#endif /* CONFIG_ARM64_SSBD */ + #define MIDR_RANGE(model, min, max) \ .def_scope = SCOPE_LOCAL_CPU, \ .matches = is_affected_midr_range, \ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 93958d1341bb..f9473efee9c5 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -18,6 +18,7 @@ * along with this program. If not, see . */ +#include #include #include @@ -137,6 +138,18 @@ alternative_else_nop_endif add \dst, \dst, #(\sym - .entry.tramp.text) .endm + // This macro corrupts x0-x3. It is the caller's duty + // to save/restore them if required. + .macro apply_ssbd, state +#ifdef CONFIG_ARM64_SSBD + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 + mov w1, #\state +alternative_cb arm64_update_smccc_conduit + nop // Patched to SMC/HVC #0 +alternative_cb_end +#endif + .endm + .macro kernel_entry, el, regsize = 64 .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 @@ -163,6 +176,13 @@ alternative_else_nop_endif ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. + apply_ssbd 1 + +#ifdef CONFIG_ARM64_SSBD + ldp x0, x1, [sp, #16 * 0] + ldp x2, x3, [sp, #16 * 1] +#endif + mov x29, xzr // fp pointed to user-space .else add x21, sp, #S_FRAME_SIZE @@ -301,6 +321,8 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: + apply_ssbd 0 + .endif msr elr_el1, x21 // set up the return data diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index c89da86de99f..ca1d2cc2cdfa 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -80,6 +80,11 @@ ARM_SMCCC_SMC_32, \ 0, 0x8000) +#define ARM_SMCCC_ARCH_WORKAROUND_2 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 0x7fff) + #ifndef __ASSEMBLY__ #include -- GitLab From 1bffd48690119d79468355ebcf992475beefb0b5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:53:01 +0100 Subject: [PATCH 0251/1291] arm64: Add per-cpu infrastructure to call ARCH_WORKAROUND_2 commit 5cf9ce6e5ea50f805c6188c04ed0daaec7b6887d upstream. In a heterogeneous system, we can end up with both affected and unaffected CPUs. Let's check their status before calling into the firmware. Reviewed-by: Julien Grall Reviewed-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 2 ++ arch/arm64/kernel/entry.S | 11 +++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 44798a700f6c..62530ba60497 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -229,6 +229,8 @@ static int qcom_enable_link_stack_sanitization(void *data) #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ #ifdef CONFIG_ARM64_SSBD +DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); + void __init arm64_update_smccc_conduit(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index f9473efee9c5..caa77d060af1 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -140,8 +140,10 @@ alternative_else_nop_endif // This macro corrupts x0-x3. It is the caller's duty // to save/restore them if required. - .macro apply_ssbd, state + .macro apply_ssbd, state, targ, tmp1, tmp2 #ifdef CONFIG_ARM64_SSBD + ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1 + cbz \tmp2, \targ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 mov w1, #\state alternative_cb arm64_update_smccc_conduit @@ -176,12 +178,13 @@ alternative_cb_end ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. - apply_ssbd 1 + apply_ssbd 1, 1f, x22, x23 #ifdef CONFIG_ARM64_SSBD ldp x0, x1, [sp, #16 * 0] ldp x2, x3, [sp, #16 * 1] #endif +1: mov x29, xzr // fp pointed to user-space .else @@ -321,8 +324,8 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: - apply_ssbd 0 - + apply_ssbd 0, 5f, x0, x1 +5: .endif msr elr_el1, x21 // set up the return data -- GitLab From 837c87c233c1cbf53c8576c2c1a269dce8d68d65 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:53:02 +0100 Subject: [PATCH 0252/1291] arm64: Add ARCH_WORKAROUND_2 probing commit a725e3dda1813ed306734823ac4c65ca04e38500 upstream. As for Spectre variant-2, we rely on SMCCC 1.1 to provide the discovery mechanism for detecting the SSBD mitigation. A new capability is also allocated for that purpose, and a config option. Reviewed-by: Julien Grall Reviewed-by: Mark Rutland Acked-by: Will Deacon Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 9 +++++ arch/arm64/include/asm/cpucaps.h | 3 +- arch/arm64/kernel/cpu_errata.c | 69 ++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2d5f7aca156d..1bbb89d37f57 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -849,6 +849,15 @@ config HARDEN_BRANCH_PREDICTOR If unsure, say Y. +config ARM64_SSBD + bool "Speculative Store Bypass Disable" if EXPERT + default y + help + This enables mitigation of the bypassing of previous stores + by speculative loads. + + If unsure, say Y. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 2e7b236bc596..76c0d23ca161 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -43,7 +43,8 @@ #define ARM64_UNMAP_KERNEL_AT_EL0 23 #define ARM64_HARDEN_BRANCH_PREDICTOR 24 #define ARM64_HARDEN_BP_POST_GUEST_EXIT 25 +#define ARM64_SSBD 26 -#define ARM64_NCAPS 26 +#define ARM64_NCAPS 27 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 62530ba60497..86cd3b690f35 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -252,6 +252,67 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt, *updptr = cpu_to_le32(insn); } + +static void arm64_set_ssbd_mitigation(bool state) +{ + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); + break; + + case PSCI_CONDUIT_SMC: + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); + break; + + default: + WARN_ON_ONCE(1); + break; + } +} + +static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, + int scope) +{ + struct arm_smccc_res res; + bool supported = true; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) + return false; + + /* + * The probe function return value is either negative + * (unsupported or mitigated), positive (unaffected), or zero + * (requires mitigation). We only need to do anything in the + * last case. + */ + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res); + if ((int)res.a0 != 0) + supported = false; + break; + + case PSCI_CONDUIT_SMC: + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res); + if ((int)res.a0 != 0) + supported = false; + break; + + default: + supported = false; + } + + if (supported) { + __this_cpu_write(arm64_ssbd_callback_required, 1); + arm64_set_ssbd_mitigation(true); + } + + return supported; +} #endif /* CONFIG_ARM64_SSBD */ #define MIDR_RANGE(model, min, max) \ @@ -451,6 +512,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), .enable = enable_smccc_arch_workaround_1, }, +#endif +#ifdef CONFIG_ARM64_SSBD + { + .desc = "Speculative Store Bypass Disable", + .def_scope = SCOPE_LOCAL_CPU, + .capability = ARM64_SSBD, + .matches = has_ssbd_mitigation, + }, #endif { } -- GitLab From 45808ab2f9245c2921b6e81025fde46ae6325013 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:53:03 +0100 Subject: [PATCH 0253/1291] arm64: Add 'ssbd' command-line option commit a43ae4dfe56a01f5b98ba0cb2f784b6a43bafcc6 upstream. On a system where the firmware implements ARCH_WORKAROUND_2, it may be useful to either permanently enable or disable the workaround for cases where the user decides that they'd rather not get a trap overhead, and keep the mitigation permanently on or off instead of switching it on exception entry/exit. In any case, default to the mitigation being enabled. Reviewed-by: Julien Grall Reviewed-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 17 +++ arch/arm64/include/asm/cpufeature.h | 6 + arch/arm64/kernel/cpu_errata.c | 103 +++++++++++++++--- 3 files changed, 110 insertions(+), 16 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 0380a45ecf4b..d6d7669e667f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3997,6 +3997,23 @@ expediting. Set to zero to disable automatic expediting. + ssbd= [ARM64,HW] + Speculative Store Bypass Disable control + + On CPUs that are vulnerable to the Speculative + Store Bypass vulnerability and offer a + firmware based mitigation, this parameter + indicates how the mitigation should be used: + + force-on: Unconditionally enable mitigation for + for both kernel and userspace + force-off: Unconditionally disable mitigation for + for both kernel and userspace + kernel: Always enable mitigation in the + kernel, and offer a prctl interface + to allow userspace to register its + interest in being mitigated too. + stack_guard_gap= [MM] override the default stack gap protection. The value is in page units and it defines how many pages prior diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 428ee1f2468c..0bd5ebfb328e 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -262,6 +262,12 @@ static inline bool system_uses_ttbr0_pan(void) !cpus_have_const_cap(ARM64_HAS_PAN); } +#define ARM64_SSBD_UNKNOWN -1 +#define ARM64_SSBD_FORCE_DISABLE 0 +#define ARM64_SSBD_KERNEL 1 +#define ARM64_SSBD_FORCE_ENABLE 2 +#define ARM64_SSBD_MITIGATED 3 + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 86cd3b690f35..b3df8e5986e0 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -231,6 +231,38 @@ static int qcom_enable_link_stack_sanitization(void *data) #ifdef CONFIG_ARM64_SSBD DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); +int ssbd_state __read_mostly = ARM64_SSBD_KERNEL; + +static const struct ssbd_options { + const char *str; + int state; +} ssbd_options[] = { + { "force-on", ARM64_SSBD_FORCE_ENABLE, }, + { "force-off", ARM64_SSBD_FORCE_DISABLE, }, + { "kernel", ARM64_SSBD_KERNEL, }, +}; + +static int __init ssbd_cfg(char *buf) +{ + int i; + + if (!buf || !buf[0]) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(ssbd_options); i++) { + int len = strlen(ssbd_options[i].str); + + if (strncmp(buf, ssbd_options[i].str, len)) + continue; + + ssbd_state = ssbd_options[i].state; + return 0; + } + + return -EINVAL; +} +early_param("ssbd", ssbd_cfg); + void __init arm64_update_smccc_conduit(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) @@ -274,44 +306,83 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, int scope) { struct arm_smccc_res res; - bool supported = true; + bool required = true; + s32 val; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { + ssbd_state = ARM64_SSBD_UNKNOWN; return false; + } - /* - * The probe function return value is either negative - * (unsupported or mitigated), positive (unaffected), or zero - * (requires mitigation). We only need to do anything in the - * last case. - */ switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_2, &res); - if ((int)res.a0 != 0) - supported = false; break; case PSCI_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_2, &res); - if ((int)res.a0 != 0) - supported = false; break; default: - supported = false; + ssbd_state = ARM64_SSBD_UNKNOWN; + return false; + } + + val = (s32)res.a0; + + switch (val) { + case SMCCC_RET_NOT_SUPPORTED: + ssbd_state = ARM64_SSBD_UNKNOWN; + return false; + + case SMCCC_RET_NOT_REQUIRED: + pr_info_once("%s mitigation not required\n", entry->desc); + ssbd_state = ARM64_SSBD_MITIGATED; + return false; + + case SMCCC_RET_SUCCESS: + required = true; + break; + + case 1: /* Mitigation not required on this CPU */ + required = false; + break; + + default: + WARN_ON(1); + return false; } - if (supported) { - __this_cpu_write(arm64_ssbd_callback_required, 1); + switch (ssbd_state) { + case ARM64_SSBD_FORCE_DISABLE: + pr_info_once("%s disabled from command-line\n", entry->desc); + arm64_set_ssbd_mitigation(false); + required = false; + break; + + case ARM64_SSBD_KERNEL: + if (required) { + __this_cpu_write(arm64_ssbd_callback_required, 1); + arm64_set_ssbd_mitigation(true); + } + break; + + case ARM64_SSBD_FORCE_ENABLE: + pr_info_once("%s forced from command-line\n", entry->desc); arm64_set_ssbd_mitigation(true); + required = true; + break; + + default: + WARN_ON(1); + break; } - return supported; + return required; } #endif /* CONFIG_ARM64_SSBD */ -- GitLab From 8d6907af4583eec62e4eb7f839da3c33d570556e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:53:04 +0100 Subject: [PATCH 0254/1291] arm64: ssbd: Add global mitigation state accessor commit c32e1736ca03904c03de0e4459a673be194f56fd upstream. We're about to need the mitigation state in various parts of the kernel in order to do the right thing for userspace and guests. Let's expose an accessor that will let other subsystems know about the state. Reviewed-by: Julien Grall Reviewed-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 0bd5ebfb328e..a2415ee6b8a3 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -268,6 +268,16 @@ static inline bool system_uses_ttbr0_pan(void) #define ARM64_SSBD_FORCE_ENABLE 2 #define ARM64_SSBD_MITIGATED 3 +static inline int arm64_get_ssbd_state(void) +{ +#ifdef CONFIG_ARM64_SSBD + extern int ssbd_state; + return ssbd_state; +#else + return ARM64_SSBD_UNKNOWN; +#endif +} + #endif /* __ASSEMBLY__ */ #endif -- GitLab From 02e26bd9ad58197533a7d5fcd62f975891a9e936 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:53:05 +0100 Subject: [PATCH 0255/1291] arm64: ssbd: Skip apply_ssbd if not using dynamic mitigation commit 986372c4367f46b34a3c0f6918d7fb95cbdf39d6 upstream. In order to avoid checking arm64_ssbd_callback_required on each kernel entry/exit even if no mitigation is required, let's add yet another alternative that by default jumps over the mitigation, and that gets nop'ed out if we're doing dynamic mitigation. Think of it as a poor man's static key... Reviewed-by: Julien Grall Reviewed-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 14 ++++++++++++++ arch/arm64/kernel/entry.S | 3 +++ 2 files changed, 17 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b3df8e5986e0..012dab9d9a01 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -285,6 +285,20 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt, *updptr = cpu_to_le32(insn); } +void __init arm64_enable_wa2_handling(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, + int nr_inst) +{ + BUG_ON(nr_inst != 1); + /* + * Only allow mitigation on EL1 entry/exit and guest + * ARCH_WORKAROUND_2 handling if the SSBD state allows it to + * be flipped. + */ + if (arm64_get_ssbd_state() == ARM64_SSBD_KERNEL) + *updptr = cpu_to_le32(aarch64_insn_gen_nop()); +} + static void arm64_set_ssbd_mitigation(bool state) { switch (psci_ops.conduit) { diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index caa77d060af1..5a585976d14c 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -142,6 +142,9 @@ alternative_else_nop_endif // to save/restore them if required. .macro apply_ssbd, state, targ, tmp1, tmp2 #ifdef CONFIG_ARM64_SSBD +alternative_cb arm64_enable_wa2_handling + b \targ +alternative_cb_end ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1 cbz \tmp2, \targ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 -- GitLab From c5c89bb4deb8e1f0eed0968f37dfa936f6b5e4c1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:53:06 +0100 Subject: [PATCH 0256/1291] arm64: ssbd: Restore mitigation status on CPU resume commit 647d0519b53f440a55df163de21c52a8205431cc upstream. On a system where firmware can dynamically change the state of the mitigation, the CPU will always come up with the mitigation enabled, including when coming back from suspend. If the user has requested "no mitigation" via a command line option, let's enforce it by calling into the firmware again to disable it. Similarily, for a resume from hibernate, the mitigation could have been disabled by the boot kernel. Let's ensure that it is set back on in that case. Acked-by: Will Deacon Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 6 ++++++ arch/arm64/kernel/cpu_errata.c | 2 +- arch/arm64/kernel/hibernate.c | 11 +++++++++++ arch/arm64/kernel/suspend.c | 8 ++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index a2415ee6b8a3..c5bc80a03515 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -278,6 +278,12 @@ static inline int arm64_get_ssbd_state(void) #endif } +#ifdef CONFIG_ARM64_SSBD +void arm64_set_ssbd_mitigation(bool state); +#else +static inline void arm64_set_ssbd_mitigation(bool state) {} +#endif + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 012dab9d9a01..eccdb28b4a39 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -299,7 +299,7 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt, *updptr = cpu_to_le32(aarch64_insn_gen_nop()); } -static void arm64_set_ssbd_mitigation(bool state) +void arm64_set_ssbd_mitigation(bool state) { switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 095d3c170f5d..a028cc95afe1 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -313,6 +313,17 @@ int swsusp_arch_suspend(void) sleep_cpu = -EINVAL; __cpu_suspend_exit(); + + /* + * Just in case the boot kernel did turn the SSBD + * mitigation off behind our back, let's set the state + * to what we expect it to be. + */ + switch (arm64_get_ssbd_state()) { + case ARM64_SSBD_FORCE_ENABLE: + case ARM64_SSBD_KERNEL: + arm64_set_ssbd_mitigation(true); + } } local_dbg_restore(flags); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 77cd655e6eb7..7a655e60cf4b 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -62,6 +62,14 @@ void notrace __cpu_suspend_exit(void) */ if (hw_breakpoint_restore) hw_breakpoint_restore(cpu); + + /* + * On resume, firmware implementing dynamic mitigation will + * have turned the mitigation on. If the user has forcefully + * disabled it, make sure their wishes are obeyed. + */ + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) + arm64_set_ssbd_mitigation(false); } /* -- GitLab From e7d02797288f12ca257cdab49d19ccf9b3d2cb12 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:53:07 +0100 Subject: [PATCH 0257/1291] arm64: ssbd: Introduce thread flag to control userspace mitigation commit 9dd9614f5476687abbff8d4b12cd08ae70d7c2ad upstream. In order to allow userspace to be mitigated on demand, let's introduce a new thread flag that prevents the mitigation from being turned off when exiting to userspace, and doesn't turn it on on entry into the kernel (with the assumption that the mitigation is always enabled in the kernel itself). This will be used by a prctl interface introduced in a later patch. Reviewed-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/entry.S | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index ddded6497a8a..fc786d344e46 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -92,6 +92,7 @@ void arch_setup_new_exec(void); #define TIF_RESTORE_SIGMASK 20 #define TIF_SINGLESTEP 21 #define TIF_32BIT 22 /* 32bit process */ +#define TIF_SSBD 23 /* Wants SSB mitigation */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5a585976d14c..c1ffa95c0ad2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -147,6 +147,8 @@ alternative_cb arm64_enable_wa2_handling alternative_cb_end ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1 cbz \tmp2, \targ + ldr \tmp2, [tsk, #TSK_TI_FLAGS] + tbnz \tmp2, #TIF_SSBD, \targ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 mov w1, #\state alternative_cb arm64_update_smccc_conduit -- GitLab From b769d86ea9d4b2d77387791cc747462b19e8af22 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:53:08 +0100 Subject: [PATCH 0258/1291] arm64: ssbd: Add prctl interface for per-thread mitigation commit 9cdc0108baa8ef87c76ed834619886a46bd70cbe upstream. If running on a system that performs dynamic SSBD mitigation, allow userspace to request the mitigation for itself. This is implemented as a prctl call, allowing the mitigation to be enabled or disabled at will for this particular thread. Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/ssbd.c | 108 +++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 arch/arm64/kernel/ssbd.c diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index def8d5623fd1..714fe90dbf66 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -54,6 +54,7 @@ arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +arm64-obj-$(CONFIG_ARM64_SSBD) += ssbd.o ifeq ($(CONFIG_KVM),y) arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c new file mode 100644 index 000000000000..0560738c1d5c --- /dev/null +++ b/arch/arm64/kernel/ssbd.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 ARM Ltd, All Rights Reserved. + */ + +#include +#include +#include +#include + +#include + +/* + * prctl interface for SSBD + */ +static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + int state = arm64_get_ssbd_state(); + + /* Unsupported */ + if (state == ARM64_SSBD_UNKNOWN) + return -EINVAL; + + /* Treat the unaffected/mitigated state separately */ + if (state == ARM64_SSBD_MITIGATED) { + switch (ctrl) { + case PR_SPEC_ENABLE: + return -EPERM; + case PR_SPEC_DISABLE: + case PR_SPEC_FORCE_DISABLE: + return 0; + } + } + + /* + * Things are a bit backward here: the arm64 internal API + * *enables the mitigation* when the userspace API *disables + * speculation*. So much fun. + */ + switch (ctrl) { + case PR_SPEC_ENABLE: + /* If speculation is force disabled, enable is not allowed */ + if (state == ARM64_SSBD_FORCE_ENABLE || + task_spec_ssb_force_disable(task)) + return -EPERM; + task_clear_spec_ssb_disable(task); + clear_tsk_thread_flag(task, TIF_SSBD); + break; + case PR_SPEC_DISABLE: + if (state == ARM64_SSBD_FORCE_DISABLE) + return -EPERM; + task_set_spec_ssb_disable(task); + set_tsk_thread_flag(task, TIF_SSBD); + break; + case PR_SPEC_FORCE_DISABLE: + if (state == ARM64_SSBD_FORCE_DISABLE) + return -EPERM; + task_set_spec_ssb_disable(task); + task_set_spec_ssb_force_disable(task); + set_tsk_thread_flag(task, TIF_SSBD); + break; + default: + return -ERANGE; + } + + return 0; +} + +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssbd_prctl_set(task, ctrl); + default: + return -ENODEV; + } +} + +static int ssbd_prctl_get(struct task_struct *task) +{ + switch (arm64_get_ssbd_state()) { + case ARM64_SSBD_UNKNOWN: + return -EINVAL; + case ARM64_SSBD_FORCE_ENABLE: + return PR_SPEC_DISABLE; + case ARM64_SSBD_KERNEL: + if (task_spec_ssb_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ssb_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + case ARM64_SSBD_FORCE_DISABLE: + return PR_SPEC_ENABLE; + default: + return PR_SPEC_NOT_AFFECTED; + } +} + +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssbd_prctl_get(task); + default: + return -ENODEV; + } +} -- GitLab From 0592871918f0acf369cec5c88de1fa76dbf2c1a2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:53:09 +0100 Subject: [PATCH 0259/1291] arm64: KVM: Add HYP per-cpu accessors commit 85478bab409171de501b719971fd25a3d5d639f9 upstream. As we're going to require to access per-cpu variables at EL2, let's craft the minimum set of accessors required to implement reading a per-cpu variable, relying on tpidr_el2 to contain the per-cpu offset. Reviewed-by: Christoffer Dall Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_asm.h | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 1ed5cb9c322c..b956418f73bd 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -69,14 +69,37 @@ extern u32 __init_stage2_translation(void); extern void __qcom_hyp_sanitize_btac_predictors(void); +/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */ +#define __hyp_this_cpu_ptr(sym) \ + ({ \ + void *__ptr = hyp_symbol_addr(sym); \ + __ptr += read_sysreg(tpidr_el2); \ + (typeof(&sym))__ptr; \ + }) + +#define __hyp_this_cpu_read(sym) \ + ({ \ + *__hyp_this_cpu_ptr(sym); \ + }) + #else /* __ASSEMBLY__ */ -.macro get_host_ctxt reg, tmp - adr_l \reg, kvm_host_cpu_state +.macro hyp_adr_this_cpu reg, sym, tmp + adr_l \reg, \sym mrs \tmp, tpidr_el2 add \reg, \reg, \tmp .endm +.macro hyp_ldr_this_cpu reg, sym, tmp + adr_l \reg, \sym + mrs \tmp, tpidr_el2 + ldr \reg, [\reg, \tmp] +.endm + +.macro get_host_ctxt reg, tmp + hyp_adr_this_cpu \reg, kvm_host_cpu_state, \tmp +.endm + .macro get_vcpu_ptr vcpu, ctxt get_host_ctxt \ctxt, \vcpu ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] -- GitLab From 805357aa65bbc419451fb9556968687c7fd1d2e1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:53:10 +0100 Subject: [PATCH 0260/1291] arm64: KVM: Add ARCH_WORKAROUND_2 support for guests commit 55e3748e8902ff641e334226bdcb432f9a5d78d3 upstream. In order to offer ARCH_WORKAROUND_2 support to guests, we need a bit of infrastructure. Let's add a flag indicating whether or not the guest uses SSBD mitigation. Depending on the state of this flag, allow KVM to disable ARCH_WORKAROUND_2 before entering the guest, and enable it when exiting it. Reviewed-by: Christoffer Dall Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_mmu.h | 5 ++++ arch/arm64/include/asm/kvm_asm.h | 3 +++ arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/include/asm/kvm_mmu.h | 24 +++++++++++++++++++ arch/arm64/kvm/hyp/switch.c | 38 +++++++++++++++++++++++++++++++ virt/kvm/arm/arm.c | 4 ++++ 6 files changed, 77 insertions(+) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 4d6b17e23d31..8a098e65f5f8 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -254,6 +254,11 @@ static inline int kvm_map_vectors(void) return 0; } +static inline int hyp_map_aux_data(void) +{ + return 0; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index b956418f73bd..1a6d02350fc6 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -33,6 +33,9 @@ #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) +#define VCPU_WORKAROUND_2_FLAG_SHIFT 0 +#define VCPU_WORKAROUND_2_FLAG (_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT) + /* Translate a kernel address of @sym into its equivalent linear mapping */ #define kvm_ksym_ref(sym) \ ({ \ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 147389bd87b9..3d1774bcf108 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -210,6 +210,9 @@ struct kvm_vcpu_arch { /* Exception Information */ struct kvm_vcpu_fault_info fault; + /* State of various workarounds, see kvm_asm.h for bit assignment */ + u64 workaround_flags; + /* Guest debug state */ u64 debug_flags; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index c84e95e0e522..e42c1f0ae6cf 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -383,5 +383,29 @@ static inline int kvm_map_vectors(void) } #endif +#ifdef CONFIG_ARM64_SSBD +DECLARE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); + +static inline int hyp_map_aux_data(void) +{ + int cpu, err; + + for_each_possible_cpu(cpu) { + u64 *ptr; + + ptr = per_cpu_ptr(&arm64_ssbd_callback_required, cpu); + err = create_hyp_mappings(ptr, ptr + 1, PAGE_HYP); + if (err) + return err; + } + return 0; +} +#else +static inline int hyp_map_aux_data(void) +{ + return 0; +} +#endif + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 1f41b0fe59bc..b2f1992c6234 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -15,6 +15,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -281,6 +282,39 @@ static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu) write_sysreg_el2(*vcpu_pc(vcpu), elr); } +static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu) +{ + if (!cpus_have_const_cap(ARM64_SSBD)) + return false; + + return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); +} + +static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * The host runs with the workaround always present. If the + * guest wants it disabled, so be it... + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); +#endif +} + +static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * If the guest has disabled the workaround, bring it back on. + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); +#endif +} + int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; @@ -311,6 +345,8 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_restore_guest_state(guest_ctxt); __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); + __set_guest_arch_workaround_state(vcpu); + /* Jump in the fire! */ again: exit_code = __guest_enter(vcpu, host_ctxt); @@ -367,6 +403,8 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) /* 0 falls through to be handled out of EL2 */ } + __set_host_arch_workaround_state(vcpu); + if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) { u32 midr = read_cpuid_id(); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 4aede98b92eb..d5f1d8364571 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1411,6 +1411,10 @@ static int init_hyp_mode(void) } } + err = hyp_map_aux_data(); + if (err) + kvm_err("Cannot map host auxilary data: %d\n", err); + return 0; out_err: -- GitLab From 1b749f8a241639951a9c65e64f51e7a6ac5b6ae9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:53:11 +0100 Subject: [PATCH 0261/1291] arm64: KVM: Handle guest's ARCH_WORKAROUND_2 requests commit b4f18c063a13dfb33e3a63fe1844823e19c2265e upstream. In order to forward the guest's ARCH_WORKAROUND_2 calls to EL3, add a small(-ish) sequence to handle it at EL2. Special care must be taken to track the state of the guest itself by updating the workaround flags. We also rely on patching to enable calls into the firmware. Note that since we need to execute branches, this always executes after the Spectre-v2 mitigation has been applied. Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kvm/hyp/hyp-entry.S | 38 ++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 2a1a04c87e9a..b5e43b01b396 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -131,6 +131,7 @@ int main(void) BLANK(); #ifdef CONFIG_KVM_ARM_HOST DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); + DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 82fbc368f738..3c283fd8c8f5 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -106,8 +106,44 @@ el1_hvc_guest: */ ldr x1, [sp] // Guest's x0 eor w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1 + cbz w1, wa_epilogue + + /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ + eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \ + ARM_SMCCC_ARCH_WORKAROUND_2) cbnz w1, el1_trap - mov x0, x1 + +#ifdef CONFIG_ARM64_SSBD +alternative_cb arm64_enable_wa2_handling + b wa2_end +alternative_cb_end + get_vcpu_ptr x2, x0 + ldr x0, [x2, #VCPU_WORKAROUND_FLAGS] + + // Sanitize the argument and update the guest flags + ldr x1, [sp, #8] // Guest's x1 + clz w1, w1 // Murphy's device: + lsr w1, w1, #5 // w1 = !!w1 without using + eor w1, w1, #1 // the flags... + bfi x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1 + str x0, [x2, #VCPU_WORKAROUND_FLAGS] + + /* Check that we actually need to perform the call */ + hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2 + cbz x0, wa2_end + + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 + smc #0 + + /* Don't leak data from the SMC call */ + mov x3, xzr +wa2_end: + mov x2, xzr + mov x1, xzr +#endif + +wa_epilogue: + mov x0, xzr add sp, sp, #16 eret -- GitLab From 96fd60c8160c16dd16542efe4491f0d096d5c1b4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Jul 2018 10:53:12 +0100 Subject: [PATCH 0262/1291] arm64: KVM: Add ARCH_WORKAROUND_2 discovery through ARCH_FEATURES_FUNC_ID commit 5d81f7dc9bca4f4963092433e27b508cbe524a32 upstream. Now that all our infrastructure is in place, let's expose the availability of ARCH_WORKAROUND_2 to guests. We take this opportunity to tidy up a couple of SMCCC constants. Acked-by: Christoffer Dall Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_host.h | 12 ++++++++++++ arch/arm64/include/asm/kvm_host.h | 23 +++++++++++++++++++++++ arch/arm64/kvm/reset.c | 4 ++++ virt/kvm/arm/psci.c | 18 ++++++++++++++++-- 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 8f973e3b7348..65572e14306c 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -302,4 +302,16 @@ static inline bool kvm_arm_harden_branch_predictor(void) return false; } +#define KVM_SSBD_UNKNOWN -1 +#define KVM_SSBD_FORCE_DISABLE 0 +#define KVM_SSBD_KERNEL 1 +#define KVM_SSBD_FORCE_ENABLE 2 +#define KVM_SSBD_MITIGATED 3 + +static inline int kvm_arm_have_ssbd(void) +{ + /* No way to detect it yet, pretend it is not there. */ + return KVM_SSBD_UNKNOWN; +} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3d1774bcf108..b01ad3489bd8 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -412,4 +412,27 @@ static inline bool kvm_arm_harden_branch_predictor(void) return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR); } +#define KVM_SSBD_UNKNOWN -1 +#define KVM_SSBD_FORCE_DISABLE 0 +#define KVM_SSBD_KERNEL 1 +#define KVM_SSBD_FORCE_ENABLE 2 +#define KVM_SSBD_MITIGATED 3 + +static inline int kvm_arm_have_ssbd(void) +{ + switch (arm64_get_ssbd_state()) { + case ARM64_SSBD_FORCE_DISABLE: + return KVM_SSBD_FORCE_DISABLE; + case ARM64_SSBD_KERNEL: + return KVM_SSBD_KERNEL; + case ARM64_SSBD_FORCE_ENABLE: + return KVM_SSBD_FORCE_ENABLE; + case ARM64_SSBD_MITIGATED: + return KVM_SSBD_MITIGATED; + case ARM64_SSBD_UNKNOWN: + default: + return KVM_SSBD_UNKNOWN; + } +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 3256b9228e75..a74311beda35 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -122,6 +122,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset PMU */ kvm_pmu_vcpu_reset(vcpu); + /* Default workaround setup is enabled (if supported) */ + if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL) + vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; + /* Reset timer */ return kvm_timer_vcpu_reset(vcpu); } diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index c4762bef13c6..c95ab4c5a475 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -405,7 +405,7 @@ static int kvm_psci_call(struct kvm_vcpu *vcpu) int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) { u32 func_id = smccc_get_function(vcpu); - u32 val = PSCI_RET_NOT_SUPPORTED; + u32 val = SMCCC_RET_NOT_SUPPORTED; u32 feature; switch (func_id) { @@ -417,7 +417,21 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) switch(feature) { case ARM_SMCCC_ARCH_WORKAROUND_1: if (kvm_arm_harden_branch_predictor()) - val = 0; + val = SMCCC_RET_SUCCESS; + break; + case ARM_SMCCC_ARCH_WORKAROUND_2: + switch (kvm_arm_have_ssbd()) { + case KVM_SSBD_FORCE_DISABLE: + case KVM_SSBD_UNKNOWN: + break; + case KVM_SSBD_KERNEL: + val = SMCCC_RET_SUCCESS; + break; + case KVM_SSBD_FORCE_ENABLE: + case KVM_SSBD_MITIGATED: + val = SMCCC_RET_NOT_REQUIRED; + break; + } break; } break; -- GitLab From 779128d80cb01e6434936e13754fc25a1cc30929 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 9 Jan 2018 07:21:15 -0800 Subject: [PATCH 0263/1291] string: drop __must_check from strscpy() and restore strscpy() usages in cgroup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 08a77676f9c5fc69a681ccd2cd8140e65dcb26c7 upstream. e7fd37ba1217 ("cgroup: avoid copying strings longer than the buffers") converted possibly unsafe strncpy() usages in cgroup to strscpy(). However, although the callsites are completely fine with truncated copied, because strscpy() is marked __must_check, it led to the following warnings. kernel/cgroup/cgroup.c: In function ‘cgroup_file_name’: kernel/cgroup/cgroup.c:1400:10: warning: ignoring return value of ‘strscpy’, declared with attribute warn_unused_result [-Wunused-result] strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX); ^ To avoid the warnings, 50034ed49645 ("cgroup: use strlcpy() instead of strscpy() to avoid spurious warning") switched them to strlcpy(). strlcpy() is worse than strlcpy() because it unconditionally runs strlen() on the source string, and the only reason we switched to strlcpy() here was because it was lacking __must_check, which doesn't reflect any material differences between the two function. It's just that someone added __must_check to strscpy() and not to strlcpy(). These basic string copy operations are used in variety of ways, and one of not-so-uncommon use cases is safely handling truncated copies, where the caller naturally doesn't care about the return value. The __must_check doesn't match the actual use cases and forces users to opt for inferior variants which lack __must_check by happenstance or spread ugly (void) casts. Remove __must_check from strscpy() and restore strscpy() usages in cgroup. Signed-off-by: Tejun Heo Suggested-by: Linus Torvalds Cc: Ma Shimiao Cc: Arnd Bergmann Cc: Chris Metcalf [backport only the string.h portion to remove build warnings starting to show up - gregkh] Signed-off-by: Greg Kroah-Hartman --- include/linux/string.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/string.h b/include/linux/string.h index cfd83eb2f926..96115bf561b4 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -28,7 +28,7 @@ extern char * strncpy(char *,const char *, __kernel_size_t); size_t strlcpy(char *, const char *, size_t); #endif #ifndef __HAVE_ARCH_STRSCPY -ssize_t __must_check strscpy(char *, const char *, size_t); +ssize_t strscpy(char *, const char *, size_t); #endif #ifndef __HAVE_ARCH_STRCAT extern char * strcat(char *, const char *); -- GitLab From ecc160ece609498c946e73710e5c7c54c62b966a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 22 Jul 2018 14:28:52 +0200 Subject: [PATCH 0264/1291] Linux 4.14.57 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index acbb0e3d29c9..a44d6b2adb76 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 56 +SUBLEVEL = 57 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 763ccb4d1980155e1f71290f9fc7950065ec77a6 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 3 Jul 2018 15:23:58 +0900 Subject: [PATCH 0265/1291] scsi: sd_zbc: Fix variable type and bogus comment commit f13cff6c25bd8986627365346d123312ee7baa78 upstream. Fix the description of sd_zbc_check_zone_size() to correctly explain that the returned value is a number of device blocks, not bytes. Additionally, the 32 bits "ret" variable used in this function may truncate the 64 bits zone_blocks variable value upon return. To fix this, change "ret" type to s64. Fixes: ccce20fc79 ("sd_zbc: Avoid that resetting a zone fails sporadically") Signed-off-by: Damien Le Moal Cc: Bart Van Assche Cc: stable@kernel.org Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd_zbc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index ea9e1e0ed5b8..f4944dde6c8e 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -430,7 +430,8 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, * Check that all zones of the device are equal. The last zone can however * be smaller. The zone size must also be a power of two number of LBAs. * - * Returns the zone size in bytes upon success or an error code upon failure. + * Returns the zone size in number of blocks upon success or an error code + * upon failure. */ static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp) { @@ -440,7 +441,7 @@ static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp) unsigned char *rec; unsigned int buf_len; unsigned int list_length; - int ret; + s64 ret; u8 same; /* Get a buffer */ -- GitLab From 3a46a033bfa81d624a649bc11b92dad37d2f8d8b Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Thu, 21 Dec 2017 21:10:36 -0500 Subject: [PATCH 0266/1291] KVM/Eventfd: Avoid crash when assign and deassign specific eventfd in parallel. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b5020a8e6b54d2ece80b1e7dedb33c79a40ebd47 upstream. Syzbot reports crashes in kvm_irqfd_assign(), caused by use-after-free when kvm_irqfd_assign() and kvm_irqfd_deassign() run in parallel for one specific eventfd. When the assign path hasn't finished but irqfd has been added to kvm->irqfds.items list, another thead may deassign the eventfd and free struct kvm_kernel_irqfd(). The assign path then uses the struct kvm_kernel_irqfd that has been freed by deassign path. To avoid such issue, keep irqfd under kvm->irq_srcu protection after the irqfd has been added to kvm->irqfds.items list, and call synchronize_srcu() in irq_shutdown() to make sure that irqfd has been fully initialized in the assign path. Reported-by: Dmitry Vyukov Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Signed-off-by: Tianyu Lan Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/eventfd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index f2ac53ab8243..58a9b31b0dd5 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -119,8 +119,12 @@ irqfd_shutdown(struct work_struct *work) { struct kvm_kernel_irqfd *irqfd = container_of(work, struct kvm_kernel_irqfd, shutdown); + struct kvm *kvm = irqfd->kvm; u64 cnt; + /* Make sure irqfd has been initalized in assign path. */ + synchronize_srcu(&kvm->irq_srcu); + /* * Synchronize with the wait-queue and unhook ourselves to prevent * further events. @@ -387,7 +391,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) idx = srcu_read_lock(&kvm->irq_srcu); irqfd_update(kvm, irqfd); - srcu_read_unlock(&kvm->irq_srcu, idx); list_add_tail(&irqfd->list, &kvm->irqfds.items); @@ -421,6 +424,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) } #endif + srcu_read_unlock(&kvm->irq_srcu, idx); return 0; fail: -- GitLab From b4108288eab85f74e0fbe7ba781e319f365a26e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 9 Jul 2018 16:35:34 +0300 Subject: [PATCH 0267/1291] x86/apm: Don't access __preempt_count with zeroed fs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6f6060a5c9cc76fdbc22748264e6aa3779ec2427 upstream. APM_DO_POP_SEGS does not restore fs/gs which were zeroed by APM_DO_ZERO_SEGS. Trying to access __preempt_count with zeroed fs doesn't really work. Move the ibrs call outside the APM_DO_SAVE_SEGS/APM_DO_RESTORE_SEGS invocations so that fs is actually restored before calling preempt_enable(). Fixes the following sort of oopses: [ 0.313581] general protection fault: 0000 [#1] PREEMPT SMP [ 0.313803] Modules linked in: [ 0.314040] CPU: 0 PID: 268 Comm: kapmd Not tainted 4.16.0-rc1-triton-bisect-00090-gdd84441a7971 #19 [ 0.316161] EIP: __apm_bios_call_simple+0xc8/0x170 [ 0.316161] EFLAGS: 00210016 CPU: 0 [ 0.316161] EAX: 00000102 EBX: 00000000 ECX: 00000102 EDX: 00000000 [ 0.316161] ESI: 0000530e EDI: dea95f64 EBP: dea95f18 ESP: dea95ef0 [ 0.316161] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 [ 0.316161] CR0: 80050033 CR2: 00000000 CR3: 015d3000 CR4: 000006d0 [ 0.316161] Call Trace: [ 0.316161] ? cpumask_weight.constprop.15+0x20/0x20 [ 0.316161] on_cpu0+0x44/0x70 [ 0.316161] apm+0x54e/0x720 [ 0.316161] ? __switch_to_asm+0x26/0x40 [ 0.316161] ? __schedule+0x17d/0x590 [ 0.316161] kthread+0xc0/0xf0 [ 0.316161] ? proc_apm_show+0x150/0x150 [ 0.316161] ? kthread_create_worker_on_cpu+0x20/0x20 [ 0.316161] ret_from_fork+0x2e/0x38 [ 0.316161] Code: da 8e c2 8e e2 8e ea 57 55 2e ff 1d e0 bb 5d b1 0f 92 c3 5d 5f 07 1f 89 47 0c 90 8d b4 26 00 00 00 00 90 8d b4 26 00 00 00 00 90 <64> ff 0d 84 16 5c b1 74 7f 8b 45 dc 8e e0 8b 45 d8 8e e8 8b 45 [ 0.316161] EIP: __apm_bios_call_simple+0xc8/0x170 SS:ESP: 0068:dea95ef0 [ 0.316161] ---[ end trace 656253db2deaa12c ]--- Fixes: dd84441a7971 ("x86/speculation: Use IBRS if available before calling into firmware") Signed-off-by: Ville Syrjälä Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Cc: David Woodhouse Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: David Woodhouse Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180709133534.5963-1-ville.syrjala@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/apm.h | 6 ------ arch/x86/kernel/apm_32.c | 5 +++++ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index c356098b6fb9..4d4015ddcf26 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -7,8 +7,6 @@ #ifndef _ASM_X86_MACH_DEFAULT_APM_H #define _ASM_X86_MACH_DEFAULT_APM_H -#include - #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ "pushl %%ds\n\t" \ @@ -34,7 +32,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ - firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -47,7 +44,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, "=S" (*esi) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); - firmware_restrict_branch_speculation_end(); } static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, @@ -60,7 +56,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ - firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -73,7 +68,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, "=S" (si) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); - firmware_restrict_branch_speculation_end(); return error; } diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 2a7fd56e67b3..63d3e6a6b5ef 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -240,6 +240,7 @@ #include #include #include +#include #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -614,11 +615,13 @@ static long __apm_bios_call(void *_call) gdt[0x40 / 8] = bad_bios_desc; apm_irq_save(flags); + firmware_restrict_branch_speculation_start(); APM_DO_SAVE_SEGS; apm_bios_call_asm(call->func, call->ebx, call->ecx, &call->eax, &call->ebx, &call->ecx, &call->edx, &call->esi); APM_DO_RESTORE_SEGS; + firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); @@ -690,10 +693,12 @@ static long __apm_bios_call_simple(void *_call) gdt[0x40 / 8] = bad_bios_desc; apm_irq_save(flags); + firmware_restrict_branch_speculation_start(); APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx, &call->eax); APM_DO_RESTORE_SEGS; + firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); -- GitLab From aa49e48232ee4dc78d736939f83057dc12d04366 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 14 Jul 2018 12:58:07 -0700 Subject: [PATCH 0268/1291] x86/events/intel/ds: Fix bts_interrupt_threshold alignment commit 2c991e408df6a407476dbc453d725e1e975479e7 upstream. Markus reported that BTS is sporadically missing the tail of the trace in the perf_event data buffer: [decode error (1): instruction overflow] shown in GDB; and bisected it to the conversion of debug_store to PTI. A little "optimization" crept into alloc_bts_buffer(), which mistakenly placed bts_interrupt_threshold away from the 24-byte record boundary. Intel SDM Vol 3B 17.4.9 says "This address must point to an offset from the BTS buffer base that is a multiple of the BTS record size." Revert "max" from a byte count to a record count, to calculate the bts_interrupt_threshold correctly: which turns out to fix problem seen. Fixes: c1961a4631da ("x86/events/intel/ds: Map debug buffers in cpu_entry_area") Reported-and-tested-by: Markus T Metzger Signed-off-by: Hugh Dickins Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Dave Hansen Cc: Stephane Eranian Cc: stable@vger.kernel.org # v4.14+ Link: https://lkml.kernel.org/r/alpine.LSU.2.11.1807141248290.1614@eggly.anvils Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/ds.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 10b39d44981c..25386be0d757 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -410,9 +410,11 @@ static int alloc_bts_buffer(int cpu) ds->bts_buffer_base = (unsigned long) cea; ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL); ds->bts_index = ds->bts_buffer_base; - max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE); - ds->bts_absolute_maximum = ds->bts_buffer_base + max; - ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16); + max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; + ds->bts_absolute_maximum = ds->bts_buffer_base + + max * BTS_RECORD_SIZE; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - + (max / 16) * BTS_RECORD_SIZE; return 0; } -- GitLab From 1450a7c5da03292fe4b022e58a92c9a9b91b27aa Mon Sep 17 00:00:00 2001 From: Dewet Thibaut Date: Mon, 16 Jul 2018 10:49:27 +0200 Subject: [PATCH 0269/1291] x86/MCE: Remove min interval polling limitation commit fbdb328c6bae0a7c78d75734a738b66b86dffc96 upstream. commit b3b7c4795c ("x86/MCE: Serialize sysfs changes") introduced a min interval limitation when setting the check interval for polled MCEs. However, the logic is that 0 disables polling for corrected MCEs, see Documentation/x86/x86_64/machinecheck. The limitation prevents disabling. Remove this limitation and allow the value 0 to disable polling again. Fixes: b3b7c4795c ("x86/MCE: Serialize sysfs changes") Signed-off-by: Dewet Thibaut Signed-off-by: Alexander Sverdlin [ Massage commit message. ] Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180716084927.24869-1-alexander.sverdlin@nokia.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 58f887f5e036..98e4e4dc4a3b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2150,9 +2150,6 @@ static ssize_t store_int_with_restart(struct device *s, if (check_interval == old_check_interval) return ret; - if (check_interval < 1) - check_interval = 1; - mutex_lock(&mce_sysfs_mutex); mce_restart(); mutex_unlock(&mce_sysfs_mutex); -- GitLab From 321089a0aa35bcc09f423180847aa15b69dc4f9f Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Fri, 20 Jul 2018 17:53:42 -0700 Subject: [PATCH 0270/1291] fat: fix memory allocation failure handling of match_strdup() commit 35033ab988c396ad7bce3b6d24060c16a9066db8 upstream. In parse_options(), if match_strdup() failed, parse_options() leaves opts->iocharset in unexpected state (i.e. still pointing the freed string). And this can be the cause of double free. To fix, this initialize opts->iocharset always when freeing. Link: http://lkml.kernel.org/r/8736wp9dzc.fsf@mail.parknet.co.jp Signed-off-by: OGAWA Hirofumi Reported-by: syzbot+90b8e10515ae88228a92@syzkaller.appspotmail.com Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/fat/inode.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index c7a4dee206b9..3b40937b942a 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -696,13 +696,21 @@ static void fat_set_state(struct super_block *sb, brelse(bh); } +static void fat_reset_iocharset(struct fat_mount_options *opts) +{ + if (opts->iocharset != fat_default_iocharset) { + /* Note: opts->iocharset can be NULL here */ + kfree(opts->iocharset); + opts->iocharset = fat_default_iocharset; + } +} + static void delayed_free(struct rcu_head *p) { struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu); unload_nls(sbi->nls_disk); unload_nls(sbi->nls_io); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); + fat_reset_iocharset(&sbi->options); kfree(sbi); } @@ -1117,7 +1125,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, opts->fs_fmask = opts->fs_dmask = current_umask(); opts->allow_utime = -1; opts->codepage = fat_default_codepage; - opts->iocharset = fat_default_iocharset; + fat_reset_iocharset(opts); if (is_vfat) { opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95; opts->rodir = 0; @@ -1274,8 +1282,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, /* vfat specific */ case Opt_charset: - if (opts->iocharset != fat_default_iocharset) - kfree(opts->iocharset); + fat_reset_iocharset(opts); iocharset = match_strdup(&args[0]); if (!iocharset) return -ENOMEM; @@ -1866,8 +1873,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, iput(fat_inode); unload_nls(sbi->nls_io); unload_nls(sbi->nls_disk); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); + fat_reset_iocharset(&sbi->options); sb->s_fs_info = NULL; kfree(sbi); return error; -- GitLab From 5d251646ab1588077b26e89dcaa116aba105d097 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 17 Jul 2018 17:26:43 +0200 Subject: [PATCH 0271/1291] ALSA: rawmidi: Change resized buffers atomically commit 39675f7a7c7e7702f7d5341f1e0d01db746543a0 upstream. The SNDRV_RAWMIDI_IOCTL_PARAMS ioctl may resize the buffers and the current code is racy. For example, the sequencer client may write to buffer while it being resized. As a simple workaround, let's switch to the resized buffer inside the stream runtime lock. Reported-by: syzbot+52f83f0ea8df16932f7f@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/rawmidi.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index f055ca10bbc1..abacbbc0b0e8 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -635,7 +635,7 @@ static int snd_rawmidi_info_select_user(struct snd_card *card, int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream, struct snd_rawmidi_params * params) { - char *newbuf; + char *newbuf, *oldbuf; struct snd_rawmidi_runtime *runtime = substream->runtime; if (substream->append && substream->use_count > 1) @@ -648,13 +648,17 @@ int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream, return -EINVAL; } if (params->buffer_size != runtime->buffer_size) { - newbuf = krealloc(runtime->buffer, params->buffer_size, - GFP_KERNEL); + newbuf = kmalloc(params->buffer_size, GFP_KERNEL); if (!newbuf) return -ENOMEM; + spin_lock_irq(&runtime->lock); + oldbuf = runtime->buffer; runtime->buffer = newbuf; runtime->buffer_size = params->buffer_size; runtime->avail = runtime->buffer_size; + runtime->appl_ptr = runtime->hw_ptr = 0; + spin_unlock_irq(&runtime->lock); + kfree(oldbuf); } runtime->avail_min = params->avail_min; substream->active_sensing = !params->no_active_sensing; @@ -665,7 +669,7 @@ EXPORT_SYMBOL(snd_rawmidi_output_params); int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream, struct snd_rawmidi_params * params) { - char *newbuf; + char *newbuf, *oldbuf; struct snd_rawmidi_runtime *runtime = substream->runtime; snd_rawmidi_drain_input(substream); @@ -676,12 +680,16 @@ int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream, return -EINVAL; } if (params->buffer_size != runtime->buffer_size) { - newbuf = krealloc(runtime->buffer, params->buffer_size, - GFP_KERNEL); + newbuf = kmalloc(params->buffer_size, GFP_KERNEL); if (!newbuf) return -ENOMEM; + spin_lock_irq(&runtime->lock); + oldbuf = runtime->buffer; runtime->buffer = newbuf; runtime->buffer_size = params->buffer_size; + runtime->appl_ptr = runtime->hw_ptr = 0; + spin_unlock_irq(&runtime->lock); + kfree(oldbuf); } runtime->avail_min = params->avail_min; return 0; -- GitLab From feefc072d10d646ee441289184b44d9db7ff2213 Mon Sep 17 00:00:00 2001 From: YOKOTA Hiroshi Date: Sun, 1 Jul 2018 18:30:01 +0900 Subject: [PATCH 0272/1291] ALSA: hda/realtek - Add Panasonic CF-SZ6 headset jack quirk commit 0fca97a29b83e3f315c14ed2372cfd0f9ee0a006 upstream. This adds some required quirk when uses headset or headphone on Panasonic CF-SZ6. Signed-off-by: YOKOTA Hiroshi Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bf7737fc3b28..dcc9e6551b51 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6402,6 +6402,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10cf, 0x1629, "Lifebook U7x7", ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC), SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE), + SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), -- GitLab From a82d4478337a366dc2f95ac961a81260c6b4589e Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Mon, 16 Jul 2018 15:50:08 +0800 Subject: [PATCH 0273/1291] ALSA: hda: add mute led support for HP ProBook 455 G5 commit 9a6249d2a145226ec1b294116fcb08744cf7ab56 upstream. Audio mute led does not work on HP ProBook 455 G5, this can be fixed by using CXT_FIXUP_MUTE_LED_GPIO to support it. BugLink: https://bugs.launchpad.net/bugs/1781763 Reported-by: James Buren Signed-off-by: Po-Hsu Lin Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index ba9a7e552183..88ce2f1022e1 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -965,6 +965,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC), SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO), + SND_PCI_QUIRK(0x103c, 0x836e, "HP ProBook 455 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8455, "HP Z2 G4", CXT_FIXUP_HP_MIC_NO_PRESENCE), -- GitLab From edb5e3eeb14a5260e19010d0e3bb4cd0ebb3ca82 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 17 Jul 2018 15:21:56 -0700 Subject: [PATCH 0274/1291] ARCv2: [plat-hsdk]: Save accl reg pair by default commit af1fc5baa724c63ce1733dfcf855bad5ef6078e3 upstream. This manifsted as strace segfaulting on HSDK because gcc was targetting the accumulator registers as GPRs, which kernek was not saving/restoring by default. Cc: stable@vger.kernel.org #4.14+ Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/Kconfig | 2 +- arch/arc/plat-hsdk/Kconfig | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 4383313b064a..5c8caf85c350 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -408,7 +408,7 @@ config ARC_HAS_DIV_REM config ARC_HAS_ACCL_REGS bool "Reg Pair ACCL:ACCH (FPU and/or MPY > 6)" - default n + default y help Depending on the configuration, CPU can contain accumulator reg-pair (also referred to as r58:r59). These can also be used by gcc as GPR so diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig index 19ab3cf98f0f..fcc9a9e27e9c 100644 --- a/arch/arc/plat-hsdk/Kconfig +++ b/arch/arc/plat-hsdk/Kconfig @@ -7,5 +7,7 @@ menuconfig ARC_SOC_HSDK bool "ARC HS Development Kit SOC" + depends on ISA_ARCV2 + select ARC_HAS_ACCL_REGS select CLK_HSDK select RESET_HSDK -- GitLab From 13e5197d7ef6662f68fd033630f47c2d62d7ccd6 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 28 Jun 2018 16:59:14 -0700 Subject: [PATCH 0275/1291] ARC: Fix CONFIG_SWAP commit 6e3761145a9ba3ce267c330b6bff51cf6a057b06 upstream. swap was broken on ARC due to silly copy-paste issue. We encode offset from swapcache page in __swp_entry() as (off << 13) but were not decoding back in __swp_offset() as (off >> 13) - it was still (off << 13). This finally fixes swap usage on ARC. | # mkswap /dev/sda2 | | # swapon -a -e /dev/sda2 | Adding 500728k swap on /dev/sda2. Priority:-2 extents:1 across:500728k | | # free | total used free shared buffers cached | Mem: 765104 13456 751648 4736 8 4736 | -/+ buffers/cache: 8712 756392 | Swap: 500728 0 500728 Cc: stable@vger.kernel.org Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 08fe33830d4b..77676e18da69 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -379,7 +379,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, /* Decode a PTE containing swap "identifier "into constituents */ #define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f) -#define __swp_offset(pte_lookalike) ((pte_lookalike).val << 13) +#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13) /* NOPs, to keep generic kernel happy */ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -- GitLab From 3c732b3ab3a5fbfba43165140a8aa8d2d32592c7 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 6 Jun 2018 15:59:38 +0300 Subject: [PATCH 0276/1291] ARC: configs: Remove CONFIG_INITRAMFS_SOURCE from defconfigs commit 64234961c145606b36eaa82c47b11be842b21049 upstream. We used to have pre-set CONFIG_INITRAMFS_SOURCE with local path to intramfs in ARC defconfigs. This was quite convenient for in-house development but not that convenient for newcomers who obviusly don't have folders like "arc_initramfs" next to the Linux source tree. Which leads to quite surprising failure of defconfig building: ------------------------------->8----------------------------- ../scripts/gen_initramfs_list.sh: Cannot open '../../arc_initramfs_hs/' ../usr/Makefile:57: recipe for target 'usr/initramfs_data.cpio.gz' failed make[2]: *** [usr/initramfs_data.cpio.gz] Error 1 ------------------------------->8----------------------------- So now when more and more people start to deal with our defconfigs let's make their life easier with removal of CONFIG_INITRAMFS_SOURCE. Signed-off-by: Alexey Brodkin Cc: Kevin Hilman Cc: stable@vger.kernel.org Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/configs/axs101_defconfig | 1 - arch/arc/configs/axs103_defconfig | 1 - arch/arc/configs/axs103_smp_defconfig | 1 - arch/arc/configs/haps_hs_defconfig | 1 - arch/arc/configs/haps_hs_smp_defconfig | 1 - arch/arc/configs/hsdk_defconfig | 1 - arch/arc/configs/nsim_700_defconfig | 1 - arch/arc/configs/nsim_hs_defconfig | 1 - arch/arc/configs/nsim_hs_smp_defconfig | 1 - arch/arc/configs/nsimosci_defconfig | 1 - arch/arc/configs/nsimosci_hs_defconfig | 1 - arch/arc/configs/nsimosci_hs_smp_defconfig | 1 - 12 files changed, 12 deletions(-) diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index ec7c849a5c8e..a8242362e551 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs/" CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 63d3cf69e0b0..ef3c31cd7737 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index f613ecac14a7..1757ac9cecbc 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig index db04ea4dd2d9..aa8240a92b60 100644 --- a/arch/arc/configs/haps_hs_defconfig +++ b/arch/arc/configs/haps_hs_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_EXPERT=y CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig index 3507be2af6fe..bc5a24ea6cf7 100644 --- a/arch/arc/configs/haps_hs_smp_defconfig +++ b/arch/arc/configs/haps_hs_smp_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 7b8f8faf8a24..762b1fcd93dc 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -9,7 +9,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 6dff83a238b8..b1a78222699c 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig index 31ee51b987e7..217d7ea3c956 100644 --- a/arch/arc/configs/nsim_hs_defconfig +++ b/arch/arc/configs/nsim_hs_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig index 8d3b1f67cae4..e733e4f1a320 100644 --- a/arch/arc/configs/nsim_hs_smp_defconfig +++ b/arch/arc/configs/nsim_hs_smp_defconfig @@ -9,7 +9,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 6168ce2ac2ef..14377b8234f7 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index a70bdeb2b3fd..7e61c923a3cd 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index ef96406c446e..299fbe8003b2 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -9,7 +9,6 @@ CONFIG_IKCONFIG_PROC=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set CONFIG_KPROBES=y -- GitLab From 95c58105bd974153505a3343537b009773e63b77 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 11 Jul 2018 10:42:20 -0700 Subject: [PATCH 0277/1291] ARC: mm: allow mprotect to make stack mappings executable commit 93312b6da4df31e4102ce5420e6217135a16c7ea upstream. mprotect(EXEC) was failing for stack mappings as default vm flags was missing MAYEXEC. This was triggered by glibc test suite nptl/tst-execstack testcase What is surprising is that despite running LTP for years on, we didn't catch this issue as it lacks a directed test case. gcc dejagnu tests with nested functions also requiring exec stack work fine though because they rely on the GNU_STACK segment spit out by compiler and handled in kernel elf loader. This glibc case is different as the stack is non exec to begin with and a dlopen of shared lib with GNU_STACK segment triggers the exec stack proceedings using a mprotect(PROT_EXEC) which was broken. CC: stable@vger.kernel.org Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 109baa06831c..09ddddf71cc5 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -105,7 +105,7 @@ typedef pte_t * pgtable_t; #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) /* Default Permissions for stack/heaps pages (Non Executable) */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE) +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #define WANT_PAGE_VIRTUAL 1 -- GitLab From be2657752e9eae45f21d69ffdd732f36e347cf62 Mon Sep 17 00:00:00 2001 From: Jing Xia Date: Fri, 20 Jul 2018 17:53:48 -0700 Subject: [PATCH 0278/1291] mm: memcg: fix use after free in mem_cgroup_iter() commit 9f15bde671355c351cf20d9f879004b234353100 upstream. It was reported that a kernel crash happened in mem_cgroup_iter(), which can be triggered if the legacy cgroup-v1 non-hierarchical mode is used. Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b8f ...... Call trace: mem_cgroup_iter+0x2e0/0x6d4 shrink_zone+0x8c/0x324 balance_pgdat+0x450/0x640 kswapd+0x130/0x4b8 kthread+0xe8/0xfc ret_from_fork+0x10/0x20 mem_cgroup_iter(): ...... if (css_tryget(css)) <-- crash here break; ...... The crashing reason is that mem_cgroup_iter() uses the memcg object whose pointer is stored in iter->position, which has been freed before and filled with POISON_FREE(0x6b). And the root cause of the use-after-free issue is that invalidate_reclaim_iterators() fails to reset the value of iter->position to NULL when the css of the memcg is released in non- hierarchical mode. Link: http://lkml.kernel.org/r/1531994807-25639-1-git-send-email-jing.xia@unisoc.com Fixes: 6df38689e0e9 ("mm: memcontrol: fix possible memcg leak due to interrupted reclaim") Signed-off-by: Jing Xia Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Cc: Shakeel Butt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 942d9342b63b..db69d938e9ed 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -879,7 +879,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) int nid; int i; - while ((memcg = parent_mem_cgroup(memcg))) { + for (; memcg; memcg = parent_mem_cgroup(memcg)) { for_each_node(nid) { mz = mem_cgroup_nodeinfo(memcg, nid); for (i = 0; i <= DEF_PRIORITY; i++) { -- GitLab From 70ef1db1f21d5a9ee2ec997e57f9307cbbbbf5bb Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 20 Jul 2018 17:53:45 -0700 Subject: [PATCH 0279/1291] mm/huge_memory.c: fix data loss when splitting a file pmd commit e1f1b1572e8db87a56609fd05bef76f98f0e456a upstream. __split_huge_pmd_locked() must check if the cleared huge pmd was dirty, and propagate that to PageDirty: otherwise, data may be lost when a huge tmpfs page is modified then split then reclaimed. How has this taken so long to be noticed? Because there was no problem when the huge page is written by a write system call (shmem_write_end() calls set_page_dirty()), nor when the page is allocated for a write fault (fault_dirty_shared_page() calls set_page_dirty()); but when allocated for a read fault (which MAP_POPULATE simulates), no set_page_dirty(). Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1807111741430.1106@eggly.anvils Fixes: d21b9e57c74c ("thp: handle file pages in split_huge_pmd()") Signed-off-by: Hugh Dickins Reported-by: Ashwin Chaugule Reviewed-by: Yang Shi Reviewed-by: Kirill A. Shutemov Cc: "Huang, Ying" Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8af604f3b370..255469f78217 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2069,6 +2069,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, if (vma_is_dax(vma)) return; page = pmd_page(_pmd); + if (!PageDirty(page) && pmd_dirty(_pmd)) + set_page_dirty(page); if (!PageReferenced(page) && pmd_young(_pmd)) SetPageReferenced(page); page_remove_rmap(page, true); -- GitLab From dfc328156ddea2385394240c373a49bb42320b4c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 18 Jul 2018 13:38:37 +0200 Subject: [PATCH 0280/1291] cpufreq: intel_pstate: Register when ACPI PCCH is present commit 95d6c0857e54b788982746071130d822a795026b upstream. Currently, intel_pstate doesn't register if _PSS is not present on HP Proliant systems, because it expects the firmware to take over CPU performance scaling in that case. However, if ACPI PCCH is present, the firmware expects the kernel to use it for CPU performance scaling and the pcc-cpufreq driver is loaded for that. Unfortunately, the firmware interface used by that driver is not scalable for fundamental reasons, so pcc-cpufreq is way suboptimal on systems with more than just a few CPUs. In fact, it is better to avoid using it at all. For this reason, modify intel_pstate to look for ACPI PCCH if _PSS is not present and register if it is there. Also prevent the pcc-cpufreq driver from trying to initialize itself if intel_pstate has been registered already. Fixes: fbbcdc0744da (intel_pstate: skip the driver if ACPI has power mgmt option) Reported-by: Andreas Herrmann Reviewed-by: Andreas Herrmann Acked-by: Srinivas Pandruvada Tested-by: Andreas Herrmann Cc: 4.16+ # 4.16+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/intel_pstate.c | 17 ++++++++++++++++- drivers/cpufreq/pcc-cpufreq.c | 4 ++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index a905bbb45667..114dfe67015b 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2188,6 +2188,18 @@ static bool __init intel_pstate_no_acpi_pss(void) return true; } +static bool __init intel_pstate_no_acpi_pcch(void) +{ + acpi_status status; + acpi_handle handle; + + status = acpi_get_handle(NULL, "\\_SB", &handle); + if (ACPI_FAILURE(status)) + return true; + + return !acpi_has_method(handle, "PCCH"); +} + static bool __init intel_pstate_has_acpi_ppc(void) { int i; @@ -2247,7 +2259,10 @@ static bool __init intel_pstate_platform_pwr_mgmt_exists(void) switch (plat_info[idx].data) { case PSS: - return intel_pstate_no_acpi_pss(); + if (!intel_pstate_no_acpi_pss()) + return false; + + return intel_pstate_no_acpi_pcch(); case PPC: return intel_pstate_has_acpi_ppc() && !force_load; } diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index 3f0ce2ae35ee..0c56c9759672 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -580,6 +580,10 @@ static int __init pcc_cpufreq_init(void) { int ret; + /* Skip initialization if another cpufreq driver is there. */ + if (cpufreq_get_current_driver()) + return 0; + if (acpi_disabled) return 0; -- GitLab From a5b8eae536723d3377d21fe60cb2f294332bb322 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 17 Jul 2018 12:39:00 -0500 Subject: [PATCH 0281/1291] vfio/pci: Fix potential Spectre v1 commit 0e714d27786ce1fb3efa9aac58abc096e68b1c2a upstream. info.index can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/vfio/pci/vfio_pci.c:734 vfio_pci_ioctl() warn: potential spectre issue 'vdev->region' Fix this by sanitizing info.index before indirectly using it to index vdev->region Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/pci/vfio_pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index f041b1a6cf66..695b9d1a1aae 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "vfio_pci_private.h" @@ -746,6 +747,9 @@ static long vfio_pci_ioctl(void *device_data, if (info.index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) return -EINVAL; + info.index = array_index_nospec(info.index, + VFIO_PCI_NUM_REGIONS + + vdev->num_regions); i = info.index - VFIO_PCI_NUM_REGIONS; -- GitLab From 9a2e4a01ded2c88e11ef6640443462862c990a9b Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 17 Jul 2018 17:19:12 +1000 Subject: [PATCH 0282/1291] vfio/spapr: Use IOMMU pageshift rather than pagesize commit 1463edca6734d42ab4406fa2896e20b45478ea36 upstream. The size is always equal to 1 page so let's use this. Later on this will be used for other checks which use page shifts to check the granularity of access. This should cause no behavioral change. Cc: stable@vger.kernel.org # v4.12+ Reviewed-by: David Gibson Acked-by: Alex Williamson Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio_iommu_spapr_tce.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 63112c36ab2d..b751dd60e41a 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -457,13 +457,13 @@ static void tce_iommu_unuse_page(struct tce_container *container, } static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container, - unsigned long tce, unsigned long size, + unsigned long tce, unsigned long shift, unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem) { long ret = 0; struct mm_iommu_table_group_mem_t *mem; - mem = mm_iommu_lookup(container->mm, tce, size); + mem = mm_iommu_lookup(container->mm, tce, 1ULL << shift); if (!mem) return -EINVAL; @@ -487,7 +487,7 @@ static void tce_iommu_unuse_page_v2(struct tce_container *container, if (!pua) return; - ret = tce_iommu_prereg_ua_to_hpa(container, *pua, IOMMU_PAGE_SIZE(tbl), + ret = tce_iommu_prereg_ua_to_hpa(container, *pua, tbl->it_page_shift, &hpa, &mem); if (ret) pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n", @@ -609,7 +609,7 @@ static long tce_iommu_build_v2(struct tce_container *container, entry + i); ret = tce_iommu_prereg_ua_to_hpa(container, - tce, IOMMU_PAGE_SIZE(tbl), &hpa, &mem); + tce, tbl->it_page_shift, &hpa, &mem); if (ret) break; -- GitLab From d21fb63010c4c8ca6768c3744cfe9ed2c399c130 Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Tue, 3 Jul 2018 15:02:14 -0700 Subject: [PATCH 0283/1291] stop_machine: Disable preemption when waking two stopper threads commit 9fb8d5dc4b649dd190e1af4ead670753e71bf907 upstream. When cpu_stop_queue_two_works() begins to wake the stopper threads, it does so without preemption disabled, which leads to the following race condition: The source CPU calls cpu_stop_queue_two_works(), with cpu1 as the source CPU, and cpu2 as the destination CPU. When adding the stopper threads to the wake queue used in this function, the source CPU stopper thread is added first, and the destination CPU stopper thread is added last. When wake_up_q() is invoked to wake the stopper threads, the threads are woken up in the order that they are queued in, so the source CPU's stopper thread is woken up first, and it preempts the thread running on the source CPU. The stopper thread will then execute on the source CPU, disable preemption, and begin executing multi_cpu_stop(), and wait for an ack from the destination CPU's stopper thread, with preemption still disabled. Since the worker thread that woke up the stopper thread on the source CPU is affine to the source CPU, and preemption is disabled on the source CPU, that thread will never run to dequeue the destination CPU's stopper thread from the wake queue, and thus, the destination CPU's stopper thread will never run, causing the source CPU's stopper thread to wait forever, and stall. Disable preemption when waking the stopper threads in cpu_stop_queue_two_works(). Fixes: 0b26351b910f ("stop_machine, sched: Fix migrate_swap() vs. active_balance() deadlock") Co-Developed-by: Prasad Sodagudi Signed-off-by: Prasad Sodagudi Co-Developed-by: Pavankumar Kondeti Signed-off-by: Pavankumar Kondeti Signed-off-by: Isaac J. Manjarres Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: matt@codeblueprint.co.uk Cc: bigeasy@linutronix.de Cc: gregkh@linuxfoundation.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1530655334-4601-1-git-send-email-isaacm@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- kernel/stop_machine.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 64c0291b579c..2f6fa95de2d8 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -270,7 +270,11 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, goto retry; } - wake_up_q(&wakeq); + if (!err) { + preempt_disable(); + wake_up_q(&wakeq); + preempt_enable(); + } return err; } -- GitLab From 387362c3ed80bd266ab8cfdf971311044adbd478 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 14 Jun 2018 20:56:25 +0300 Subject: [PATCH 0284/1291] drm/i915: Fix hotplug irq ack on i965/g4x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 96a85cc517a9ee4ae5e8d7f5a36cba05023784eb upstream. Just like with PIPESTAT, the edge triggered IIR on i965/g4x also causes problems for hotplug interrupts. To make sure we don't get the IIR port interrupt bit stuck low with the ISR bit high we must force an edge in ISR. Unfortunately we can't borrow the PIPESTAT trick and toggle the enable bits in PORT_HOTPLUG_EN as that act itself generates hotplug interrupts. Instead we just have to loop until we've cleared PORT_HOTPLUG_STAT, or we just give up and WARN. v2: Don't frob with PORT_HOTPLUG_EN Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180614175625.1615-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit 0ba7c51a6fd80a89236f6ceb52e63f8a7f62bfd3) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_irq.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b63893eeca73..20a471ad0ad2 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1786,10 +1786,38 @@ static void valleyview_pipestat_irq_handler(struct drm_i915_private *dev_priv, static u32 i9xx_hpd_irq_ack(struct drm_i915_private *dev_priv) { - u32 hotplug_status = I915_READ(PORT_HOTPLUG_STAT); + u32 hotplug_status = 0, hotplug_status_mask; + int i; + + if (IS_G4X(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + hotplug_status_mask = HOTPLUG_INT_STATUS_G4X | + DP_AUX_CHANNEL_MASK_INT_STATUS_G4X; + else + hotplug_status_mask = HOTPLUG_INT_STATUS_I915; - if (hotplug_status) + /* + * We absolutely have to clear all the pending interrupt + * bits in PORT_HOTPLUG_STAT. Otherwise the ISR port + * interrupt bit won't have an edge, and the i965/g4x + * edge triggered IIR will not notice that an interrupt + * is still pending. We can't use PORT_HOTPLUG_EN to + * guarantee the edge as the act of toggling the enable + * bits can itself generate a new hotplug interrupt :( + */ + for (i = 0; i < 10; i++) { + u32 tmp = I915_READ(PORT_HOTPLUG_STAT) & hotplug_status_mask; + + if (tmp == 0) + return hotplug_status; + + hotplug_status |= tmp; I915_WRITE(PORT_HOTPLUG_STAT, hotplug_status); + } + + WARN_ONCE(1, + "PORT_HOTPLUG_STAT did not clear (0x%08x)\n", + I915_READ(PORT_HOTPLUG_STAT)); return hotplug_status; } -- GitLab From 22c4488c864922b19dfed41c16ebf9568f78743b Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Fri, 13 Jul 2018 13:06:32 -0400 Subject: [PATCH 0285/1291] drm/nouveau: Use drm_connector_list_iter_* for iterating connectors commit 22b76bbe089cd901f5260ecb9a3dc41f9edb97a0 upstream. Every codepath in nouveau that loops through the connector list currently does so using the old method, which is prone to race conditions from MST connectors being created and destroyed. This has been causing a multitude of problems, including memory corruption from trying to access connectors that have already been freed! Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Cc: Karol Herbst Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_backlight.c | 6 ++++-- drivers/gpu/drm/nouveau/nouveau_connector.c | 9 +++++++-- drivers/gpu/drm/nouveau/nouveau_connector.h | 14 ++++++++++---- drivers/gpu/drm/nouveau/nouveau_display.c | 10 ++++++++-- 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index debbbf0fd4bd..408b955e5c39 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -267,6 +267,7 @@ nouveau_backlight_init(struct drm_device *dev) struct nouveau_drm *drm = nouveau_drm(dev); struct nvif_device *device = &drm->client.device; struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; INIT_LIST_HEAD(&drm->bl_connectors); @@ -275,7 +276,8 @@ nouveau_backlight_init(struct drm_device *dev) return 0; } - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS && connector->connector_type != DRM_MODE_CONNECTOR_eDP) continue; @@ -292,7 +294,7 @@ nouveau_backlight_init(struct drm_device *dev) break; } } - + drm_connector_list_iter_end(&conn_iter); return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index c902a851eb51..53d93d95701f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1208,14 +1208,19 @@ nouveau_connector_create(struct drm_device *dev, int index) struct nouveau_display *disp = nouveau_display(dev); struct nouveau_connector *nv_connector = NULL; struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; int type, ret = 0; bool dummy; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { nv_connector = nouveau_connector(connector); - if (nv_connector->index == index) + if (nv_connector->index == index) { + drm_connector_list_iter_end(&conn_iter); return connector; + } } + drm_connector_list_iter_end(&conn_iter); nv_connector = kzalloc(sizeof(*nv_connector), GFP_KERNEL); if (!nv_connector) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h index a4d1a059bd3d..a8cbb4b56fc7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.h +++ b/drivers/gpu/drm/nouveau/nouveau_connector.h @@ -65,14 +65,20 @@ nouveau_crtc_connector_get(struct nouveau_crtc *nv_crtc) { struct drm_device *dev = nv_crtc->base.dev; struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; + struct nouveau_connector *nv_connector = NULL; struct drm_crtc *crtc = to_drm_crtc(nv_crtc); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - if (connector->encoder && connector->encoder->crtc == crtc) - return nouveau_connector(connector); + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { + if (connector->encoder && connector->encoder->crtc == crtc) { + nv_connector = nouveau_connector(connector); + break; + } } + drm_connector_list_iter_end(&conn_iter); - return NULL; + return nv_connector; } struct drm_connector * diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 2e7785f49e6d..3ab765b83be7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -405,6 +405,7 @@ nouveau_display_init(struct drm_device *dev) struct nouveau_display *disp = nouveau_display(dev); struct nouveau_drm *drm = nouveau_drm(dev); struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; int ret; ret = disp->init(dev); @@ -412,10 +413,12 @@ nouveau_display_init(struct drm_device *dev) return ret; /* enable hotplug interrupts */ - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { struct nouveau_connector *conn = nouveau_connector(connector); nvif_notify_get(&conn->hpd); } + drm_connector_list_iter_end(&conn_iter); /* enable flip completion events */ nvif_notify_get(&drm->flip); @@ -428,6 +431,7 @@ nouveau_display_fini(struct drm_device *dev, bool suspend) struct nouveau_display *disp = nouveau_display(dev); struct nouveau_drm *drm = nouveau_drm(dev); struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; if (!suspend) { if (drm_drv_uses_atomic_modeset(dev)) @@ -440,10 +444,12 @@ nouveau_display_fini(struct drm_device *dev, bool suspend) nvif_notify_put(&drm->flip); /* disable hotplug interrupts */ - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { struct nouveau_connector *conn = nouveau_connector(connector); nvif_notify_put(&conn->hpd); } + drm_connector_list_iter_end(&conn_iter); drm_kms_helper_poll_disable(dev); disp->fini(dev); -- GitLab From dd1363ca743eebf1dd7ccb6f9010b9d6de3afafc Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Fri, 13 Jul 2018 13:06:33 -0400 Subject: [PATCH 0286/1291] drm/nouveau: Avoid looping through fake MST connectors commit 37afe55b4ae0600deafe7c0e0e658593c4754f1b upstream. When MST and atomic were introduced to nouveau, another structure that could contain a drm_connector embedded within it was introduced; struct nv50_mstc. This meant that we no longer would be able to simply loop through our connector list and assume that nouveau_connector() would return a proper pointer for each connector, since the assertion that all connectors coming from nouveau have a full nouveau_connector struct became invalid. Unfortunately, none of the actual code that looped through connectors ever got updated, which means that we've been causing invalid memory accesses for quite a while now. An example that was caught by KASAN: [ 201.038698] ================================================================== [ 201.038792] BUG: KASAN: slab-out-of-bounds in nvif_notify_get+0x190/0x1a0 [nouveau] [ 201.038797] Read of size 4 at addr ffff88076738c650 by task kworker/0:3/718 [ 201.038800] [ 201.038822] CPU: 0 PID: 718 Comm: kworker/0:3 Tainted: G O 4.18.0-rc4Lyude-Test+ #1 [ 201.038825] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET78W (1.51 ) 05/18/2018 [ 201.038882] Workqueue: events nouveau_display_hpd_work [nouveau] [ 201.038887] Call Trace: [ 201.038894] dump_stack+0xa4/0xfd [ 201.038900] print_address_description+0x71/0x239 [ 201.038929] ? nvif_notify_get+0x190/0x1a0 [nouveau] [ 201.038935] kasan_report.cold.6+0x242/0x2fe [ 201.038942] __asan_report_load4_noabort+0x19/0x20 [ 201.038970] nvif_notify_get+0x190/0x1a0 [nouveau] [ 201.038998] ? nvif_notify_put+0x1f0/0x1f0 [nouveau] [ 201.039003] ? kmsg_dump_rewind_nolock+0xe4/0xe4 [ 201.039049] nouveau_display_init.cold.12+0x34/0x39 [nouveau] [ 201.039089] ? nouveau_user_framebuffer_create+0x120/0x120 [nouveau] [ 201.039133] nouveau_display_resume+0x5c0/0x810 [nouveau] [ 201.039173] ? nvkm_client_ioctl+0x20/0x20 [nouveau] [ 201.039215] nouveau_do_resume+0x19f/0x570 [nouveau] [ 201.039256] nouveau_pmops_runtime_resume+0xd8/0x2a0 [nouveau] [ 201.039264] pci_pm_runtime_resume+0x130/0x250 [ 201.039269] ? pci_restore_standard_config+0x70/0x70 [ 201.039275] __rpm_callback+0x1f2/0x5d0 [ 201.039279] ? rpm_resume+0x560/0x18a0 [ 201.039283] ? pci_restore_standard_config+0x70/0x70 [ 201.039287] ? pci_restore_standard_config+0x70/0x70 [ 201.039291] ? pci_restore_standard_config+0x70/0x70 [ 201.039296] rpm_callback+0x175/0x210 [ 201.039300] ? pci_restore_standard_config+0x70/0x70 [ 201.039305] rpm_resume+0xcc3/0x18a0 [ 201.039312] ? rpm_callback+0x210/0x210 [ 201.039317] ? __pm_runtime_resume+0x9e/0x100 [ 201.039322] ? kasan_check_write+0x14/0x20 [ 201.039326] ? do_raw_spin_lock+0xc2/0x1c0 [ 201.039333] __pm_runtime_resume+0xac/0x100 [ 201.039374] nouveau_display_hpd_work+0x67/0x1f0 [nouveau] [ 201.039380] process_one_work+0x7a0/0x14d0 [ 201.039388] ? cancel_delayed_work_sync+0x20/0x20 [ 201.039392] ? lock_acquire+0x113/0x310 [ 201.039398] ? kasan_check_write+0x14/0x20 [ 201.039402] ? do_raw_spin_lock+0xc2/0x1c0 [ 201.039409] worker_thread+0x86/0xb50 [ 201.039418] kthread+0x2e9/0x3a0 [ 201.039422] ? process_one_work+0x14d0/0x14d0 [ 201.039426] ? kthread_create_worker_on_cpu+0xc0/0xc0 [ 201.039431] ret_from_fork+0x3a/0x50 [ 201.039441] [ 201.039444] Allocated by task 79: [ 201.039449] save_stack+0x43/0xd0 [ 201.039452] kasan_kmalloc+0xc4/0xe0 [ 201.039456] kmem_cache_alloc_trace+0x10a/0x260 [ 201.039494] nv50_mstm_add_connector+0x9a/0x340 [nouveau] [ 201.039504] drm_dp_add_port+0xff5/0x1fc0 [drm_kms_helper] [ 201.039511] drm_dp_send_link_address+0x4a7/0x740 [drm_kms_helper] [ 201.039518] drm_dp_check_and_send_link_address+0x1a7/0x210 [drm_kms_helper] [ 201.039525] drm_dp_mst_link_probe_work+0x71/0xb0 [drm_kms_helper] [ 201.039529] process_one_work+0x7a0/0x14d0 [ 201.039533] worker_thread+0x86/0xb50 [ 201.039537] kthread+0x2e9/0x3a0 [ 201.039541] ret_from_fork+0x3a/0x50 [ 201.039543] [ 201.039546] Freed by task 0: [ 201.039549] (stack is not available) [ 201.039551] [ 201.039555] The buggy address belongs to the object at ffff88076738c1a8 which belongs to the cache kmalloc-2048 of size 2048 [ 201.039559] The buggy address is located 1192 bytes inside of 2048-byte region [ffff88076738c1a8, ffff88076738c9a8) [ 201.039563] The buggy address belongs to the page: [ 201.039567] page:ffffea001d9ce200 count:1 mapcount:0 mapping:ffff88084000d0c0 index:0x0 compound_mapcount: 0 [ 201.039573] flags: 0x8000000000008100(slab|head) [ 201.039578] raw: 8000000000008100 ffffea001da3be08 ffffea001da25a08 ffff88084000d0c0 [ 201.039582] raw: 0000000000000000 00000000000d000d 00000001ffffffff 0000000000000000 [ 201.039585] page dumped because: kasan: bad access detected [ 201.039588] [ 201.039591] Memory state around the buggy address: [ 201.039594] ffff88076738c500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 201.039598] ffff88076738c580: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 201.039601] >ffff88076738c600: 00 00 00 00 00 00 00 00 00 00 fc fc fc fc fc fc [ 201.039604] ^ [ 201.039607] ffff88076738c680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 201.039611] ffff88076738c700: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 201.039613] ================================================================== Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Cc: Karol Herbst Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_connector.c | 2 +- drivers/gpu/drm/nouveau/nouveau_connector.h | 24 ++++++++++++++++++++- drivers/gpu/drm/nouveau/nouveau_display.c | 4 ++-- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 53d93d95701f..430830d63a33 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1213,7 +1213,7 @@ nouveau_connector_create(struct drm_device *dev, int index) bool dummy; drm_connector_list_iter_begin(dev, &conn_iter); - drm_for_each_connector_iter(connector, &conn_iter) { + nouveau_for_each_non_mst_connector_iter(connector, &conn_iter) { nv_connector = nouveau_connector(connector); if (nv_connector->index == index) { drm_connector_list_iter_end(&conn_iter); diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h index a8cbb4b56fc7..dc7454e7f19a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.h +++ b/drivers/gpu/drm/nouveau/nouveau_connector.h @@ -33,6 +33,7 @@ #include #include #include "nouveau_crtc.h" +#include "nouveau_encoder.h" struct nvkm_i2c_port; @@ -60,6 +61,27 @@ static inline struct nouveau_connector *nouveau_connector( return container_of(con, struct nouveau_connector, base); } +static inline bool +nouveau_connector_is_mst(struct drm_connector *connector) +{ + const struct nouveau_encoder *nv_encoder; + const struct drm_encoder *encoder; + + if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) + return false; + + nv_encoder = find_encoder(connector, DCB_OUTPUT_ANY); + if (!nv_encoder) + return false; + + encoder = &nv_encoder->base.base; + return encoder->encoder_type == DRM_MODE_ENCODER_DPMST; +} + +#define nouveau_for_each_non_mst_connector_iter(connector, iter) \ + drm_for_each_connector_iter(connector, iter) \ + for_each_if(!nouveau_connector_is_mst(connector)) + static inline struct nouveau_connector * nouveau_crtc_connector_get(struct nouveau_crtc *nv_crtc) { @@ -70,7 +92,7 @@ nouveau_crtc_connector_get(struct nouveau_crtc *nv_crtc) struct drm_crtc *crtc = to_drm_crtc(nv_crtc); drm_connector_list_iter_begin(dev, &conn_iter); - drm_for_each_connector_iter(connector, &conn_iter) { + nouveau_for_each_non_mst_connector_iter(connector, &conn_iter) { if (connector->encoder && connector->encoder->crtc == crtc) { nv_connector = nouveau_connector(connector); break; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 3ab765b83be7..caf53503c0f7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -414,7 +414,7 @@ nouveau_display_init(struct drm_device *dev) /* enable hotplug interrupts */ drm_connector_list_iter_begin(dev, &conn_iter); - drm_for_each_connector_iter(connector, &conn_iter) { + nouveau_for_each_non_mst_connector_iter(connector, &conn_iter) { struct nouveau_connector *conn = nouveau_connector(connector); nvif_notify_get(&conn->hpd); } @@ -445,7 +445,7 @@ nouveau_display_fini(struct drm_device *dev, bool suspend) /* disable hotplug interrupts */ drm_connector_list_iter_begin(dev, &conn_iter); - drm_for_each_connector_iter(connector, &conn_iter) { + nouveau_for_each_non_mst_connector_iter(connector, &conn_iter) { struct nouveau_connector *conn = nouveau_connector(connector); nvif_notify_put(&conn->hpd); } -- GitLab From c8347d91cfd7134e4fb242ac0a16511b589d7a5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 2 Jul 2018 22:52:20 +0200 Subject: [PATCH 0287/1291] gen_stats: Fix netlink stats dumping in the presence of padding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d5a672ac9f48f81b20b1cad1d9ed7bbf4e418d4c ] The gen_stats facility will add a header for the toplevel nlattr of type TCA_STATS2 that contains all stats added by qdisc callbacks. A reference to this header is stored in the gnet_dump struct, and when all the per-qdisc callbacks have finished adding their stats, the length of the containing header will be adjusted to the right value. However, on architectures that need padding (i.e., that don't set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS), the padding nlattr is added before the stats, which means that the stored pointer will point to the padding, and so when the header is fixed up, the result is just a very big padding nlattr. Because most qdiscs also supply the legacy TCA_STATS struct, this problem has been mostly invisible, but we exposed it with the netlink attribute-based statistics in CAKE. Fix the issue by fixing up the stored pointer if it points to a padding nlattr. Tested-by: Pete Heist Tested-by: Kevin Darbyshire-Bryant Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/gen_stats.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 87f28557b329..441c04adedba 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -77,8 +77,20 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, d->lock = lock; spin_lock_bh(lock); } - if (d->tail) - return gnet_stats_copy(d, type, NULL, 0, padattr); + if (d->tail) { + int ret = gnet_stats_copy(d, type, NULL, 0, padattr); + + /* The initial attribute added in gnet_stats_copy() may be + * preceded by a padding attribute, in which case d->tail will + * end up pointing at the padding instead of the real attribute. + * Fix this so gnet_stats_finish_copy() adjusts the length of + * the right attribute. + */ + if (ret == 0 && d->tail->nla_type == padattr) + d->tail = (struct nlattr *)((char *)d->tail + + NLA_ALIGN(d->tail->nla_len)); + return ret; + } return 0; } -- GitLab From 0348dcd98af3b9bf3ddf4785e74b335bdc65bc4d Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 5 Jul 2018 18:49:23 +0000 Subject: [PATCH 0288/1291] ipv4: Return EINVAL when ping_group_range sysctl doesn't map to user ns [ Upstream commit 70ba5b6db96ff7324b8cfc87e0d0383cf59c9677 ] The low and high values of the net.ipv4.ping_group_range sysctl were being silently forced to the default disabled state when a write to the sysctl contained GIDs that didn't map to the associated user namespace. Confusingly, the sysctl's write operation would return success and then a subsequent read of the sysctl would indicate that the low and high values are the overflowgid. This patch changes the behavior by clearly returning an error when the sysctl write operation receives a GID range that doesn't map to the associated user namespace. In such a situation, the previous value of the sysctl is preserved and that range will be returned in a subsequent read of the sysctl. Signed-off-by: Tyler Hicks Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/sysctl_net_ipv4.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5a29dc5083a3..d82e8344fc54 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -186,8 +186,9 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, if (write && ret == 0) { low = make_kgid(user_ns, urange[0]); high = make_kgid(user_ns, urange[1]); - if (!gid_valid(low) || !gid_valid(high) || - (urange[1] < urange[0]) || gid_lt(high, low)) { + if (!gid_valid(low) || !gid_valid(high)) + return -EINVAL; + if (urange[1] < urange[0] || gid_lt(high, low)) { low = make_kgid(&init_user_ns, 1); high = make_kgid(&init_user_ns, 0); } -- GitLab From a5d33d38bd37a16b4af224d35f4abfc686bfab01 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 17 Jul 2018 17:12:39 +0100 Subject: [PATCH 0289/1291] ipv6: fix useless rol32 call on hash [ Upstream commit 169dc027fb02492ea37a0575db6a658cf922b854 ] The rol32 call is currently rotating hash but the rol'd value is being discarded. I believe the current code is incorrect and hash should be assigned the rotated value returned from rol32. Thanks to David Lebrun for spotting this. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index a54b8c58ccb7..e59f385da38e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -795,7 +795,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, * to minimize possbility that any useful information to an * attacker is leaked. Only lower 20 bits are relevant. */ - rol32(hash, 16); + hash = rol32(hash, 16); flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; -- GitLab From 9f7276ce825bb1324dca2515d7b5e5d320af77f2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Jul 2018 10:48:56 +0200 Subject: [PATCH 0290/1291] ipv6: ila: select CONFIG_DST_CACHE [ Upstream commit 83ed7d1fe2d2d4a11b30660dec20168bb473d9c1 ] My randconfig builds came across an old missing dependency for ILA: ERROR: "dst_cache_set_ip6" [net/ipv6/ila/ila.ko] undefined! ERROR: "dst_cache_get" [net/ipv6/ila/ila.ko] undefined! ERROR: "dst_cache_init" [net/ipv6/ila/ila.ko] undefined! ERROR: "dst_cache_destroy" [net/ipv6/ila/ila.ko] undefined! We almost never run into this by accident because randconfig builds end up selecting DST_CACHE from some other tunnel protocol, and this one appears to be the only one missing the explicit 'select'. >From all I can tell, this problem first appeared in linux-4.9 when dst_cache support got added to ILA. Fixes: 79ff2fc31e0f ("ila: Cache a route to translated address") Cc: Tom Herbert Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index ea71e4b0ab7a..2d36fd097299 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -109,6 +109,7 @@ config IPV6_MIP6 config IPV6_ILA tristate "IPv6: Identifier Locator Addressing (ILA)" depends on NETFILTER + select DST_CACHE select LWTUNNEL ---help--- Support for IPv6 Identifier Locator Addressing (ILA). -- GitLab From cfb876dc30429879584dde6ced2b18f8d0757b3e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 16 Jul 2018 13:26:13 -0700 Subject: [PATCH 0291/1291] lib/rhashtable: consider param->min_size when setting initial table size [ Upstream commit 107d01f5ba10f4162c38109496607eb197059064 ] rhashtable_init() currently does not take into account the user-passed min_size parameter unless param->nelem_hint is set as well. As such, the default size (number of buckets) will always be HASH_DEFAULT_SIZE even if the smallest allowed size is larger than that. Remediate this by unconditionally calling into rounded_hashtable_size() and handling things accordingly. Signed-off-by: Davidlohr Bueso Acked-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- lib/rhashtable.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index b734ce731a7a..39215c724fc7 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -878,8 +878,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop); static size_t rounded_hashtable_size(const struct rhashtable_params *params) { - return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), - (unsigned long)params->min_size); + size_t retsize; + + if (params->nelem_hint) + retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3), + (unsigned long)params->min_size); + else + retsize = max(HASH_DEFAULT_SIZE, + (unsigned long)params->min_size); + + return retsize; } static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) @@ -936,8 +944,6 @@ int rhashtable_init(struct rhashtable *ht, struct bucket_table *tbl; size_t size; - size = HASH_DEFAULT_SIZE; - if ((!params->key_len && !params->obj_hashfn) || (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; @@ -964,8 +970,7 @@ int rhashtable_init(struct rhashtable *ht, ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE); - if (params->nelem_hint) - size = rounded_hashtable_size(&ht->p); + size = rounded_hashtable_size(&ht->p); if (params->locks_mul) ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); -- GitLab From cc0ab64759c89a640e8d57c106e85e6816380337 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Sat, 7 Jul 2018 16:31:40 +0900 Subject: [PATCH 0292/1291] net: diag: Don't double-free TCP_NEW_SYN_RECV sockets in tcp_abort [ Upstream commit acc2cf4e37174646a24cba42fa53c668b2338d4e ] When tcp_diag_destroy closes a TCP_NEW_SYN_RECV socket, it first frees it by calling inet_csk_reqsk_queue_drop_and_and_put in tcp_abort, and then frees it again by calling sock_gen_put. Since tcp_abort only has one caller, and all the other codepaths in tcp_abort don't free the socket, just remove the free in that function. Cc: David Ahern Tested: passes Android sock_diag_test.py, which exercises this codepath Fixes: d7226c7a4dd1 ("net: diag: Fix refcnt leak in error path destroying socket") Signed-off-by: Lorenzo Colitti Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e3ece12f0250..e81ff9d545a4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3415,8 +3415,7 @@ int tcp_abort(struct sock *sk, int err) struct request_sock *req = inet_reqsk(sk); local_bh_disable(); - inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, - req); + inet_csk_reqsk_queue_drop(req->rsk_listener, req); local_bh_enable(); return 0; } -- GitLab From 6403b54a4f7e097842d7814fe20124b32e5d3e1d Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 11 Jul 2018 14:39:42 +0200 Subject: [PATCH 0293/1291] net: Don't copy pfmemalloc flag in __copy_skb_header() [ Upstream commit 8b7008620b8452728cadead460a36f64ed78c460 ] The pfmemalloc flag indicates that the skb was allocated from the PFMEMALLOC reserves, and the flag is currently copied on skb copy and clone. However, an skb copied from an skb flagged with pfmemalloc wasn't necessarily allocated from PFMEMALLOC reserves, and on the other hand an skb allocated that way might be copied from an skb that wasn't. So we should not copy the flag on skb copy, and rather decide whether to allow an skb to be associated with sockets unrelated to page reclaim depending only on how it was allocated. Move the pfmemalloc flag before headers_start[0] using an existing 1-bit hole, so that __copy_skb_header() doesn't copy it. When cloning, we'll now take care of this flag explicitly, contravening to the warning comment of __skb_clone(). While at it, restore the newline usage introduced by commit b19372273164 ("net: reorganize sk_buff for faster __copy_skb_header()") to visually separate bytes used in bitfields after headers_start[0], that was gone after commit a9e419dc7be6 ("netfilter: merge ctinfo into nfct pointer storage area"), and describe the pfmemalloc flag in the kernel-doc structure comment. This doesn't change the size of sk_buff or cacheline boundaries, but consolidates the 15 bits hole before tc_index into a 2 bytes hole before csum, that could now be filled more easily. Reported-by: Patrick Talbert Fixes: c93bdd0e03e8 ("netvm: allow skb allocation to use PFMEMALLOC reserves") Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 10 +++++----- net/core/skbuff.c | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index be45224b01d7..9cf971c68401 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -624,6 +624,7 @@ typedef unsigned char *sk_buff_data_t; * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @xmit_more: More SKBs are pending for this queue + * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport @@ -722,7 +723,7 @@ struct sk_buff { peeked:1, head_frag:1, xmit_more:1, - __unused:1; /* one bit hole */ + pfmemalloc:1; /* fields enclosed in headers_start/headers_end are copied * using a single memcpy() in __copy_skb_header() @@ -741,31 +742,30 @@ struct sk_buff { __u8 __pkt_type_offset[0]; __u8 pkt_type:3; - __u8 pfmemalloc:1; __u8 ignore_df:1; - __u8 nf_trace:1; __u8 ip_summed:2; __u8 ooo_okay:1; + __u8 l4_hash:1; __u8 sw_hash:1; __u8 wifi_acked_valid:1; __u8 wifi_acked:1; - __u8 no_fcs:1; /* Indicates the inner headers are valid in the skbuff. */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; __u8 csum_valid:1; + __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 csum_not_inet:1; - __u8 dst_pending_confirm:1; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif __u8 ipvs_property:1; + __u8 inner_protocol_type:1; __u8 remcsum_offload:1; #ifdef CONFIG_NET_SWITCHDEV diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c132eca9e383..159518a80a46 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -858,6 +858,8 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) n->cloned = 1; n->nohdr = 0; n->peeked = 0; + if (skb->pfmemalloc) + n->pfmemalloc = 1; n->destructor = NULL; C(tail); C(end); -- GitLab From 829f4fd66354560fa66c83a3d199453036bef7ed Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 13 Jul 2018 13:21:07 +0200 Subject: [PATCH 0294/1291] skbuff: Unconditionally copy pfmemalloc in __skb_clone() [ Upstream commit e78bfb0751d4e312699106ba7efbed2bab1a53ca ] Commit 8b7008620b84 ("net: Don't copy pfmemalloc flag in __copy_skb_header()") introduced a different handling for the pfmemalloc flag in copy and clone paths. In __skb_clone(), now, the flag is set only if it was set in the original skb, but not cleared if it wasn't. This is wrong and might lead to socket buffers being flagged with pfmemalloc even if the skb data wasn't allocated from pfmemalloc reserves. Copy the flag instead of ORing it. Reported-by: Sabrina Dubroca Fixes: 8b7008620b84 ("net: Don't copy pfmemalloc flag in __copy_skb_header()") Signed-off-by: Stefano Brivio Tested-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 159518a80a46..23041b5c0b27 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -858,8 +858,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) n->cloned = 1; n->nohdr = 0; n->peeked = 0; - if (skb->pfmemalloc) - n->pfmemalloc = 1; + C(pfmemalloc); n->destructor = NULL; C(tail); C(end); -- GitLab From 78382d78bb4a99ba312bfeef8b23006eb9f2e368 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 7 Jul 2018 16:15:26 -0700 Subject: [PATCH 0295/1291] net/ipv4: Set oif in fib_compute_spec_dst [ Upstream commit e7372197e15856ec4ee66b668020a662994db103 ] Xin reported that icmp replies may not use the address on the device the echo request is received if the destination address is broadcast. Instead a route lookup is done without considering VRF context. Fix by setting oif in flow struct to the master device if it is enslaved. That directs the lookup to the VRF table. If the device is not enslaved, oif is still 0 so no affect. Fixes: cd2fbe1b6b51 ("net: Use VRF device index for lookups on RX") Reported-by: Xin Long Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_frontend.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index df8fd3ce713d..67eebcb113f3 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -290,6 +290,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { struct flowi4 fl4 = { .flowi4_iif = LOOPBACK_IFINDEX, + .flowi4_oif = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), .flowi4_scope = scope, -- GitLab From 811ad4b366a60a7e05b20c3c8262bb95e69d4ade Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 3 Jul 2018 22:34:54 +0200 Subject: [PATCH 0296/1291] net: phy: fix flag masking in __set_phy_supported [ Upstream commit df8ed346d4a806a6eef2db5924285e839604b3f9 ] Currently also the pause flags are removed from phydev->supported because they're not included in PHY_DEFAULT_FEATURES. I don't think this is intended, especially when considering that this function can be called via phy_set_max_speed() anywhere in a driver. Change the masking to mask out only the values we're going to change. In addition remove the misleading comment, job of this small function is just to adjust the supported and advertised speeds. Fixes: f3a6bd393c2c ("phylib: Add phy_set_max_speed helper") Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index a1e7ea4d4b16..a174d05a9752 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1686,11 +1686,8 @@ EXPORT_SYMBOL(genphy_loopback); static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) { - /* The default values for phydev->supported are provided by the PHY - * driver "features" member, we want to reset to sane defaults first - * before supporting higher speeds. - */ - phydev->supported &= PHY_DEFAULT_FEATURES; + phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES | + PHY_10BT_FEATURES); switch (max_speed) { default: -- GitLab From 294dc77bb75e358c9e8350fd6217305c680bf1b3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 17 Jul 2018 20:17:33 -0500 Subject: [PATCH 0297/1291] ptp: fix missing break in switch [ Upstream commit 9ba8376ce1e2cbf4ce44f7e4bee1d0648e10d594 ] It seems that a *break* is missing in order to avoid falling through to the default case. Otherwise, checking *chan* makes no sense. Fixes: 72df7a7244c0 ("ptp: Allow reassigning calibration pin function") Signed-off-by: Gustavo A. R. Silva Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/ptp/ptp_chardev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 58a97d420572..51364621f77c 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -89,6 +89,7 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, case PTP_PF_PHYSYNC: if (chan != 0) return -EINVAL; + break; default: return -EINVAL; } -- GitLab From c84c7d8383c29effa009a03f05555ded0dce9bf3 Mon Sep 17 00:00:00 2001 From: Matevz Vucnik Date: Wed, 4 Jul 2018 18:12:48 +0200 Subject: [PATCH 0298/1291] qmi_wwan: add support for Quectel EG91 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 38cd58ed9c4e389799b507bcffe02a7a7a180b33 ] This adds the USB id of LTE modem Quectel EG91. It requires the same quirk as other Quectel modems to make it work. Signed-off-by: Matevz Vucnik Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 0db500bf86d9..6d3811c869fd 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1252,6 +1252,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1e0e, 0x9001, 5)}, /* SIMCom 7230E */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)}, /* Quectel EP06 Mini PCIe */ -- GitLab From c7daaa272ddab653e2f2144e9dd497ded53a5bf7 Mon Sep 17 00:00:00 2001 From: Sanjeev Bansal Date: Mon, 16 Jul 2018 11:13:32 +0530 Subject: [PATCH 0299/1291] tg3: Add higher cpu clock for 5762. [ Upstream commit 3a498606bb04af603a46ebde8296040b2de350d1 ] This patch has fix for TX timeout while running bi-directional traffic with 100 Mbps using 5762. Signed-off-by: Sanjeev Bansal Signed-off-by: Siva Reddy Kallam Reviewed-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 9a8ef630466f..1b1d2a67f412 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -9279,6 +9279,15 @@ static int tg3_chip_reset(struct tg3 *tp) tg3_restore_clk(tp); + /* Increase the core clock speed to fix tx timeout issue for 5762 + * with 100Mbps link speed. + */ + if (tg3_asic_rev(tp) == ASIC_REV_5762) { + val = tr32(TG3_CPMU_CLCK_ORIDE_ENABLE); + tw32(TG3_CPMU_CLCK_ORIDE_ENABLE, val | + TG3_CPMU_MAC_ORIDE_ENABLE); + } + /* Reprobe ASF enable state. */ tg3_flag_clear(tp, ENABLE_ASF); tp->phy_flags &= ~(TG3_PHYFLG_1G_ON_VAUX_OK | -- GitLab From 7e7fefde7dec77475457453f3e7e28acf3be1171 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Tue, 17 Jul 2018 17:11:13 +0000 Subject: [PATCH 0300/1291] hv_netvsc: Fix napi reschedule while receive completion is busy [ Upstream commit 6b81b193b83e87da1ea13217d684b54fccf8ee8a ] If out ring is full temporarily and receive completion cannot go out, we may still need to reschedule napi if certain conditions are met. Otherwise the napi poll might be stopped forever, and cause network disconnect. Fixes: 7426b1a51803 ("netvsc: optimize receive completions") Signed-off-by: Stephen Hemminger Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 701be5d81062..806239b89990 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1250,6 +1250,7 @@ int netvsc_poll(struct napi_struct *napi, int budget) struct hv_device *device = netvsc_channel_to_device(channel); struct net_device *ndev = hv_get_drvdata(device); int work_done = 0; + int ret; /* If starting a new interval */ if (!nvchan->desc) @@ -1261,16 +1262,18 @@ int netvsc_poll(struct napi_struct *napi, int budget) nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc); } - /* If send of pending receive completions suceeded - * and did not exhaust NAPI budget this time - * and not doing busy poll + /* Send any pending receive completions */ + ret = send_recv_completions(ndev, net_device, nvchan); + + /* If it did not exhaust NAPI budget this time + * and not doing busy poll * then re-enable host interrupts - * and reschedule if ring is not empty. + * and reschedule if ring is not empty + * or sending receive completion failed. */ - if (send_recv_completions(ndev, net_device, nvchan) == 0 && - work_done < budget && + if (work_done < budget && napi_complete_done(napi, work_done) && - hv_end_read(&channel->inbound) && + (ret || hv_end_read(&channel->inbound)) && napi_schedule_prep(napi)) { hv_begin_read(&channel->inbound); __napi_schedule(napi); -- GitLab From 65851c6b6d6ef51cfd37bd19e00a8045a58812ef Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 15 Jul 2018 13:54:39 +0300 Subject: [PATCH 0301/1291] net/mlx4_en: Don't reuse RX page when XDP is set [ Upstream commit 432e629e56432064761be63bcd5e263c0920430d ] When a new rx packet arrives, the rx path will decide whether to reuse the remainder of the page or not according to one of the below conditions: 1. frag_info->frag_stride == PAGE_SIZE / 2 2. frags->page_offset + frag_info->frag_size > PAGE_SIZE; The first condition is no met for when XDP is set. For XDP, page_offset is always set to priv->rx_headroom which is XDP_PACKET_HEADROOM and frag_info->frag_size is around mtu size + some padding, still the 2nd release condition will hold since XDP_PACKET_HEADROOM + 1536 < PAGE_SIZE, as a result the page will not be released and will be _wrongly_ reused for next free rx descriptor. In XDP there is an assumption to have a page per packet and reuse can break such assumption and might cause packet data corruptions. Fix this by adding an extra condition (!priv->rx_headroom) to the 2nd case to avoid page reuse when XDP is set, since rx_headroom is set to 0 for non XDP setup and set to XDP_PACKET_HEADROOM for XDP setup. No additional cache line is required for the new condition. Fixes: 34db548bfb95 ("mlx4: add page recycling in receive path") Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Suggested-by: Martin KaFai Lau CC: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index b97a55c827eb..ab2a9dbb46c7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -472,10 +472,10 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, { const struct mlx4_en_frag_info *frag_info = priv->frag_info; unsigned int truesize = 0; + bool release = true; int nr, frag_size; struct page *page; dma_addr_t dma; - bool release; /* Collect used fragments while replacing them in the HW descriptors */ for (nr = 0;; frags++) { @@ -498,7 +498,11 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, release = page_count(page) != 1 || page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id(); - } else { + } else if (!priv->rx_headroom) { + /* rx_headroom for non XDP setup is always 0. + * When XDP is set, the above condition will + * guarantee page is always released. + */ u32 sz_align = ALIGN(frag_size, SMP_CACHE_BYTES); frags->page_offset += sz_align; -- GitLab From 7c14cf21867cee5cb8f6b830d08769f01c1b69a7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 11 Jul 2018 02:47:58 -0700 Subject: [PATCH 0302/1291] net: systemport: Fix CRC forwarding check for SYSTEMPORT Lite [ Upstream commit 9e3bff923913729d76d87f0015848ee7b8ff7083 ] SYSTEMPORT Lite reversed the logic compared to SYSTEMPORT, the GIB_FCS_STRIP bit is set when the Ethernet FCS is stripped, and that bit is not set by default. Fix the logic such that we properly check whether that bit is set or not and we don't forward an extra 4 bytes to the network stack. Fixes: 44a4524c54af ("net: systemport: Add support for SYSTEMPORT Lite") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bcmsysport.c | 4 ++-- drivers/net/ethernet/broadcom/bcmsysport.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 1e856e8b9a92..0fff2432ab4c 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1851,8 +1851,8 @@ static int bcm_sysport_open(struct net_device *dev) if (!priv->is_lite) priv->crc_fwd = !!(umac_readl(priv, UMAC_CMD) & CMD_CRC_FWD); else - priv->crc_fwd = !!(gib_readl(priv, GIB_CONTROL) & - GIB_FCS_STRIP); + priv->crc_fwd = !((gib_readl(priv, GIB_CONTROL) & + GIB_FCS_STRIP) >> GIB_FCS_STRIP_SHIFT); phydev = of_phy_connect(dev, priv->phy_dn, bcm_sysport_adj_link, 0, priv->phy_interface); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index a2006f5fc26f..86ae751ccb5c 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -277,7 +277,8 @@ struct bcm_rsb { #define GIB_GTX_CLK_EXT_CLK (0 << GIB_GTX_CLK_SEL_SHIFT) #define GIB_GTX_CLK_125MHZ (1 << GIB_GTX_CLK_SEL_SHIFT) #define GIB_GTX_CLK_250MHZ (2 << GIB_GTX_CLK_SEL_SHIFT) -#define GIB_FCS_STRIP (1 << 6) +#define GIB_FCS_STRIP_SHIFT 6 +#define GIB_FCS_STRIP (1 << GIB_FCS_STRIP_SHIFT) #define GIB_LCL_LOOP_EN (1 << 7) #define GIB_LCL_LOOP_TXEN (1 << 8) #define GIB_RMT_LOOP_EN (1 << 9) -- GitLab From bbf9b1a46420191fe147a3d83bdce71fe594fea3 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 13 Jul 2018 17:21:42 +0200 Subject: [PATCH 0303/1291] ipv6: make DAD fail with enhanced DAD when nonce length differs [ Upstream commit e66515999b627368892ccc9b3a13a506f2ea1357 ] Commit adc176c54722 ("ipv6 addrconf: Implemented enhanced DAD (RFC7527)") added enhanced DAD with a nonce length of 6 bytes. However, RFC7527 doesn't specify the length of the nonce, other than being 6 + 8*k bytes, with integer k >= 0 (RFC3971 5.3.2). The current implementation simply assumes that the nonce will always be 6 bytes, but others systems are free to choose different sizes. If another system sends a nonce of different length but with the same 6 bytes prefix, it shouldn't be considered as the same nonce. Thus, check that the length of the received nonce is the same as the length we sent. Ugly scapy test script running on veth0: def loop(): pkt=sniff(iface="veth0", filter="icmp6", count=1) pkt = pkt[0] b = bytearray(pkt[Raw].load) b[1] += 1 b += b'\xde\xad\xbe\xef\xde\xad\xbe\xef' pkt[Raw].load = bytes(b) pkt[IPv6].plen += 8 # fixup checksum after modifying the payload pkt[IPv6].payload.cksum -= 0x3b44 if pkt[IPv6].payload.cksum < 0: pkt[IPv6].payload.cksum += 0xffff sendp(pkt, iface="veth0") This should result in DAD failure for any address added to veth0's peer, but is currently ignored. Fixes: adc176c54722 ("ipv6 addrconf: Implemented enhanced DAD (RFC7527)") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ndisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d081db125905..528218460bc5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -803,7 +803,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) return; } } - if (ndopts.nd_opts_nonce) + if (ndopts.nd_opts_nonce && ndopts.nd_opts_nonce->nd_opt_len == 1) memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6); inc = ipv6_addr_is_multicast(daddr); -- GitLab From 78cdeb665fb1f4cdc8fdbb95c584f916ba11f0f1 Mon Sep 17 00:00:00 2001 From: Alexander Couzens Date: Tue, 17 Jul 2018 13:17:09 +0200 Subject: [PATCH 0304/1291] net: usb: asix: replace mii_nway_restart in resume path [ Upstream commit 5c968f48021a9b3faa61ac2543cfab32461c0e05 ] mii_nway_restart is not pm aware which results in a rtnl deadlock. Implement mii_nway_restart manual by setting BMCR_ANRESTART if BMCR_ANENABLE is set. To reproduce: * plug an asix based usb network interface * wait until the device enters PM (~5 sec) * `ip link set eth1 up` will never return Fixes: d9fe64e51114 ("net: asix: Add in_pm parameter") Signed-off-by: Alexander Couzens Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/asix_devices.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 3d4f7959dabb..b1b3d8f7e67d 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -642,10 +642,12 @@ static void ax88772_restore_phy(struct usbnet *dev) priv->presvd_phy_advertise); /* Restore BMCR */ + if (priv->presvd_phy_bmcr & BMCR_ANENABLE) + priv->presvd_phy_bmcr |= BMCR_ANRESTART; + asix_mdio_write_nopm(dev->net, dev->mii.phy_id, MII_BMCR, priv->presvd_phy_bmcr); - mii_nway_restart(&dev->mii); priv->presvd_phy_advertise = 0; priv->presvd_phy_bmcr = 0; } -- GitLab From b12c7d0847e224301fb2323b5a85d866ead87199 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 Jul 2018 15:07:11 +0100 Subject: [PATCH 0305/1291] alpha: fix osf_wait4() breakage commit f88a333b44318643282b8acc92af90deda441f5e upstream. kernel_wait4() expects a userland address for status - it's only rusage that goes as a kernel one (and needs a copyout afterwards) [ Also, fix the prototype of kernel_wait4() to have that __user annotation - Linus ] Fixes: 92ebce5ac55d ("osf_wait4: switch to kernel_wait4()") Cc: stable@kernel.org # v4.13+ Signed-off-by: Al Viro Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/osf_sys.c | 5 +---- include/linux/sched/task.h | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 75a5c35a2067..a48976dc9bcd 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1183,13 +1183,10 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options, struct rusage32 __user *, ur) { - unsigned int status = 0; struct rusage r; - long err = kernel_wait4(pid, &status, options, &r); + long err = kernel_wait4(pid, ustatus, options, &r); if (err <= 0) return err; - if (put_user(status, ustatus)) - return -EFAULT; if (!ur) return err; if (put_tv32(&ur->ru_utime, &r.ru_utime)) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 05b8650f06f5..a74ec619ac51 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -75,7 +75,7 @@ extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -extern long kernel_wait4(pid_t, int *, int, struct rusage *); +extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); extern void free_task(struct task_struct *tsk); -- GitLab From ea8a50e5f829ad3e855f45b29e96612a92bb1418 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2018 09:43:13 -0400 Subject: [PATCH 0306/1291] cxl_getfile(): fix double-iput() on alloc_file() failures commit d202797f480c0e5918e7642d6716cdc62b3ab5c9 upstream. Doing iput() after path_put() is wrong. Cc: stable@vger.kernel.org Acked-by: Linus Torvalds Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index a0c44d16bf30..c75daba57fd7 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -102,15 +102,15 @@ static struct file *cxl_getfile(const char *name, d_instantiate(path.dentry, inode); file = alloc_file(&path, OPEN_FMODE(flags), fops); - if (IS_ERR(file)) - goto err_dput; + if (IS_ERR(file)) { + path_put(&path); + goto err_fs; + } file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); file->private_data = priv; return file; -err_dput: - path_put(&path); err_inode: iput(inode); err_fs: -- GitLab From ff42682f21d7084e0808171a973b0bcecd14ec14 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 18 Jul 2018 14:03:16 +0530 Subject: [PATCH 0307/1291] powerpc/powernv: Fix save/restore of SPRG3 on entry/exit from stop (idle) commit b03897cf318dfc47de33a7ecbc7655584266f034 upstream. On 64-bit servers, SPRN_SPRG3 and its userspace read-only mirror SPRN_USPRG3 are used as userspace VDSO write and read registers respectively. SPRN_SPRG3 is lost when we enter stop4 and above, and is currently not restored. As a result, any read from SPRN_USPRG3 returns zero on an exit from stop4 (Power9 only) and above. Thus in this situation, on POWER9, any call from sched_getcpu() always returns zero, as on powerpc, we call __kernel_getcpu() which relies upon SPRN_USPRG3 to report the CPU and NUMA node information. Fix this by restoring SPRN_SPRG3 on wake up from a deep stop state with the sprg_vdso value that is cached in PACA. Fixes: e1c1cfed5432 ("powerpc/powernv: Save/Restore additional SPRs for stop4 cpuidle") Cc: stable@vger.kernel.org # v4.14+ Reported-by: Florian Weimer Signed-off-by: Gautham R. Shenoy Reviewed-by: Michael Ellerman Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/idle_book3s.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index e35cebd45c35..4efbde0984b2 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -140,6 +140,8 @@ power9_restore_additional_sprs: ld r4, STOP_MMCR2(r13) mtspr SPRN_MMCR1, r3 mtspr SPRN_MMCR2, r4 + ld r4, PACA_SPRG_VDSO(r13) + mtspr SPRN_SPRG3, r4 blr /* -- GitLab From 3fcc143edcdd2ce497c9d2a0327c9b7ca1bbf403 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 21 Jun 2018 16:19:41 +0300 Subject: [PATCH 0308/1291] xhci: Fix perceived dead host due to runtime suspend race with event handler commit 229bc19fd7aca4f37964af06e3583c1c8f36b5d6 upstream. Don't rely on event interrupt (EINT) bit alone to detect pending port change in resume. If no change event is detected the host may be suspended again, oterwise roothubs are resumed. There is a lag in xHC setting EINT. If we don't notice the pending change in resume, and the controller is runtime suspeded again, it causes the event handler to assume host is dead as it will fail to read xHC registers once PCI puts the controller to D3 state. [ 268.520969] xhci_hcd: xhci_resume: starting port polling. [ 268.520985] xhci_hcd: xhci_hub_status_data: stopping port polling. [ 268.521030] xhci_hcd: xhci_suspend: stopping port polling. [ 268.521040] xhci_hcd: // Setting command ring address to 0x349bd001 [ 268.521139] xhci_hcd: Port Status Change Event for port 3 [ 268.521149] xhci_hcd: resume root hub [ 268.521163] xhci_hcd: port resume event for port 3 [ 268.521168] xhci_hcd: xHC is not running. [ 268.521174] xhci_hcd: handle_port_status: starting port polling. [ 268.596322] xhci_hcd: xhci_hc_died: xHCI host controller not responding, assume dead The EINT lag is described in a additional note in xhci specs 4.19.2: "Due to internal xHC scheduling and system delays, there will be a lag between a change bit being set and the Port Status Change Event that it generated being written to the Event Ring. If SW reads the PORTSC and sees a change bit set, there is no guarantee that the corresponding Port Status Change Event has already been written into the Event Ring." Cc: Signed-off-by: Mathias Nyman Signed-off-by: Kai-Heng Feng Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 40 +++++++++++++++++++++++++++++++++++++--- drivers/usb/host/xhci.h | 4 ++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index e5bccc6d49cf..fe84b36627ec 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -856,6 +856,41 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci) spin_unlock_irqrestore(&xhci->lock, flags); } +static bool xhci_pending_portevent(struct xhci_hcd *xhci) +{ + __le32 __iomem **port_array; + int port_index; + u32 status; + u32 portsc; + + status = readl(&xhci->op_regs->status); + if (status & STS_EINT) + return true; + /* + * Checking STS_EINT is not enough as there is a lag between a change + * bit being set and the Port Status Change Event that it generated + * being written to the Event Ring. See note in xhci 1.1 section 4.19.2. + */ + + port_index = xhci->num_usb2_ports; + port_array = xhci->usb2_ports; + while (port_index--) { + portsc = readl(port_array[port_index]); + if (portsc & PORT_CHANGE_MASK || + (portsc & PORT_PLS_MASK) == XDEV_RESUME) + return true; + } + port_index = xhci->num_usb3_ports; + port_array = xhci->usb3_ports; + while (port_index--) { + portsc = readl(port_array[port_index]); + if (portsc & PORT_CHANGE_MASK || + (portsc & PORT_PLS_MASK) == XDEV_RESUME) + return true; + } + return false; +} + /* * Stop HC (not bus-specific) * @@ -955,7 +990,7 @@ EXPORT_SYMBOL_GPL(xhci_suspend); */ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) { - u32 command, temp = 0, status; + u32 command, temp = 0; struct usb_hcd *hcd = xhci_to_hcd(xhci); struct usb_hcd *secondary_hcd; int retval = 0; @@ -1077,8 +1112,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) done: if (retval == 0) { /* Resume root hubs only when have pending events. */ - status = readl(&xhci->op_regs->status); - if (status & STS_EINT) { + if (xhci_pending_portevent(xhci)) { usb_hcd_resume_root_hub(xhci->shared_hcd); usb_hcd_resume_root_hub(hcd); } diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 2a72060dda1b..11232e62b898 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -392,6 +392,10 @@ struct xhci_op_regs { #define PORT_PLC (1 << 22) /* port configure error change - port failed to configure its link partner */ #define PORT_CEC (1 << 23) +#define PORT_CHANGE_MASK (PORT_CSC | PORT_PEC | PORT_WRC | PORT_OCC | \ + PORT_RC | PORT_PLC | PORT_CEC) + + /* Cold Attach Status - xHC can set this bit to report device attached during * Sx state. Warm port reset should be perfomed to clear this bit and move port * to connected state. -- GitLab From f952480a8fc14ea24a4de9582e425924f98c92d3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 25 Jul 2018 11:25:11 +0200 Subject: [PATCH 0309/1291] Linux 4.14.58 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a44d6b2adb76..ffc9b4e3867e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 57 +SUBLEVEL = 58 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 4168a84223646130203c7b566c27f9a74abef7ea Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 26 Jul 2018 12:19:48 +0200 Subject: [PATCH 0310/1291] Revert "cifs: Fix slab-out-of-bounds in send_set_info() on SMB2 ACE setting" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 748144f35514aef14c4fdef5bcaa0db99cb9367a which is commit f46ecbd97f508e68a7806291a139499794874f3d upstream. Philip reports: seems adding "cifs: Fix slab-out-of-bounds in send_set_info() on SMB2 ACE setting" (commit 748144f) [1] created a regression within linux v4.14 kernel series. Writing to a mounted cifs either freezes on writing or crashes the PC. A more detailed explanation you may find in our forums [2]. Reverting the patch, seems to "fix" it. Thoughts? [2] https://forum.manjaro.org/t/53250 Reported-by: Philip Müller Cc: Jianhong Yin Cc: Stefano Brivio Cc: Aurelien Aptel Cc: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 0480cd9a9e81..71b81980787f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -338,10 +338,7 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, return rc; /* BB eventually switch this to SMB2 specific small buf size */ - if (smb2_command == SMB2_SET_INFO) - *request_buf = cifs_buf_get(); - else - *request_buf = cifs_small_buf_get(); + *request_buf = cifs_small_buf_get(); if (*request_buf == NULL) { /* BB should we add a retry in here if not a writepage? */ return -ENOMEM; @@ -3171,7 +3168,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, } rc = SendReceive2(xid, ses, iov, num, &resp_buftype, flags, &rsp_iov); - cifs_buf_release(req); + cifs_small_buf_release(req); rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base; if (rc != 0) -- GitLab From 4c686d73bc3e090c8d8d55a07a1010955dcce9df Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 20 Jul 2018 13:58:21 +0200 Subject: [PATCH 0311/1291] MIPS: ath79: fix register address in ath79_ddr_wb_flush() commit bc88ad2efd11f29e00a4fd60fcd1887abfe76833 upstream. ath79_ddr_wb_flush_base has the type void __iomem *, so register offsets need to be a multiple of 4 in order to access the intended register. Signed-off-by: Felix Fietkau Signed-off-by: John Crispin Signed-off-by: Paul Burton Fixes: 24b0e3e84fbf ("MIPS: ath79: Improve the DDR controller interface") Patchwork: https://patchwork.linux-mips.org/patch/19912/ Cc: Alban Bedel Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 4.2+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/ath79/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c index 10a405d593df..c782b10ddf50 100644 --- a/arch/mips/ath79/common.c +++ b/arch/mips/ath79/common.c @@ -58,7 +58,7 @@ EXPORT_SYMBOL_GPL(ath79_ddr_ctrl_init); void ath79_ddr_wb_flush(u32 reg) { - void __iomem *flush_reg = ath79_ddr_wb_flush_base + reg; + void __iomem *flush_reg = ath79_ddr_wb_flush_base + (reg * 4); /* Flush the DDR write buffer. */ __raw_writel(0x1, flush_reg); -- GitLab From de019e7857fb743bb50db9b82dd447d4894b5610 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 12 Jul 2018 09:33:04 -0700 Subject: [PATCH 0312/1291] MIPS: Fix off-by-one in pci_resource_to_user() commit 38c0a74fe06da3be133cae3fb7bde6a9438e698b upstream. The MIPS implementation of pci_resource_to_user() introduced in v3.12 by commit 4c2924b725fb ("MIPS: PCI: Use pci_resource_to_user to map pci memory space properly") incorrectly sets *end to the address of the byte after the resource, rather than the last byte of the resource. This results in userland seeing resources as a byte larger than they actually are, for example a 32 byte BAR will be reported by a tool such as lspci as being 33 bytes in size: Region 2: I/O ports at 1000 [disabled] [size=33] Correct this by subtracting one from the calculated end address, reporting the correct address to userland. Signed-off-by: Paul Burton Reported-by: Rui Wang Fixes: 4c2924b725fb ("MIPS: PCI: Use pci_resource_to_user to map pci memory space properly") Cc: James Hogan Cc: Ralf Baechle Cc: Wolfgang Grandegger Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v3.12+ Patchwork: https://patchwork.linux-mips.org/patch/19829/ Signed-off-by: Greg Kroah-Hartman --- arch/mips/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index 9632436d74d7..c2e94cf5ecda 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -54,5 +54,5 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar, phys_addr_t size = resource_size(rsrc); *start = fixup_bigphys_addr(rsrc->start, size); - *end = rsrc->start + size; + *end = rsrc->start + size - 1; } -- GitLab From 14500f14e0b6dd3fff66eec70b6bb1cf4d108dfa Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 8 May 2018 19:56:22 -0400 Subject: [PATCH 0313/1291] xen/PVH: Set up GS segment for stack canary commit 98014068328c5574de9a4a30b604111fd9d8f901 upstream. We are making calls to C code (e.g. xen_prepare_pvh()) which may use stack canary (stored in GS segment). Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Cc: Jason Andryuk Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/xen-pvh.S | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S index e1a5fbeae08d..5d7554c025fd 100644 --- a/arch/x86/xen/xen-pvh.S +++ b/arch/x86/xen/xen-pvh.S @@ -54,6 +54,9 @@ * charge of setting up it's own stack, GDT and IDT. */ +#define PVH_GDT_ENTRY_CANARY 4 +#define PVH_CANARY_SEL (PVH_GDT_ENTRY_CANARY * 8) + ENTRY(pvh_start_xen) cld @@ -98,6 +101,12 @@ ENTRY(pvh_start_xen) /* 64-bit entry point. */ .code64 1: + /* Set base address in stack canary descriptor. */ + mov $MSR_GS_BASE,%ecx + mov $_pa(canary), %eax + xor %edx, %edx + wrmsr + call xen_prepare_pvh /* startup_64 expects boot_params in %rsi. */ @@ -107,6 +116,17 @@ ENTRY(pvh_start_xen) #else /* CONFIG_X86_64 */ + /* Set base address in stack canary descriptor. */ + movl $_pa(gdt_start),%eax + movl $_pa(canary),%ecx + movw %cx, (PVH_GDT_ENTRY_CANARY * 8) + 2(%eax) + shrl $16, %ecx + movb %cl, (PVH_GDT_ENTRY_CANARY * 8) + 4(%eax) + movb %ch, (PVH_GDT_ENTRY_CANARY * 8) + 7(%eax) + + mov $PVH_CANARY_SEL,%eax + mov %eax,%gs + call mk_early_pgtbl_32 mov $_pa(initial_page_table), %eax @@ -150,9 +170,13 @@ gdt_start: .quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* __KERNEL_CS */ #endif .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* __KERNEL_DS */ + .quad GDT_ENTRY(0x4090, 0, 0x18) /* PVH_CANARY_SEL */ gdt_end: - .balign 4 + .balign 16 +canary: + .fill 48, 1, 0 + early_stack: .fill 256, 1, 0 early_stack_end: -- GitLab From 58113603a4ea409a2b77daa1e1caa663c648c748 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 17 Jul 2018 17:19:13 +1000 Subject: [PATCH 0314/1291] KVM: PPC: Check if IOMMU page is contained in the pinned physical page commit 76fa4975f3ed12d15762bc979ca44078598ed8ee upstream. A VM which has: - a DMA capable device passed through to it (eg. network card); - running a malicious kernel that ignores H_PUT_TCE failure; - capability of using IOMMU pages bigger that physical pages can create an IOMMU mapping that exposes (for example) 16MB of the host physical memory to the device when only 64K was allocated to the VM. The remaining 16MB - 64K will be some other content of host memory, possibly including pages of the VM, but also pages of host kernel memory, host programs or other VMs. The attacking VM does not control the location of the page it can map, and is only allowed to map as many pages as it has pages of RAM. We already have a check in drivers/vfio/vfio_iommu_spapr_tce.c that an IOMMU page is contained in the physical page so the PCI hardware won't get access to unassigned host memory; however this check is missing in the KVM fastpath (H_PUT_TCE accelerated code). We were lucky so far and did not hit this yet as the very first time when the mapping happens we do not have tbl::it_userspace allocated yet and fall back to the userspace which in turn calls VFIO IOMMU driver, this fails and the guest does not retry, This stores the smallest preregistered page size in the preregistered region descriptor and changes the mm_iommu_xxx API to check this against the IOMMU page size. This calculates maximum page size as a minimum of the natural region alignment and compound page size. For the page shift this uses the shift returned by find_linux_pte() which indicates how the page is mapped to the current userspace - if the page is huge and this is not a zero, then it is a leaf pte and the page is mapped within the range. Fixes: 121f80ba68f1 ("KVM: PPC: VFIO: Add in-kernel acceleration for VFIO") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Signed-off-by: Alexey Kardashevskiy Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/mmu_context.h | 4 +-- arch/powerpc/kvm/book3s_64_vio.c | 2 +- arch/powerpc/kvm/book3s_64_vio_hv.c | 6 +++-- arch/powerpc/mm/mmu_context_iommu.c | 37 ++++++++++++++++++++++++-- drivers/vfio/vfio_iommu_spapr_tce.c | 2 +- 5 files changed, 43 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 44fdf4786638..6f67ff5a5267 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -35,9 +35,9 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm( extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, unsigned long ua, unsigned long entries); extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa); + unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa); + unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); #endif diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 4dffa611376d..e14cec6bc339 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -433,7 +433,7 @@ long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ return H_TOO_HARD; - if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa))) + if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa))) return H_HARDWARE; if (mm_iommu_mapped_inc(mem)) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index c32e9bfe75b1..648cf6c01348 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -262,7 +262,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, if (!mem) return H_TOO_HARD; - if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa))) + if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift, + &hpa))) return H_HARDWARE; pua = (void *) vmalloc_to_phys(pua); @@ -431,7 +432,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); if (mem) - prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0; + prereg = mm_iommu_ua_to_hpa_rm(mem, ua, + IOMMU_PAGE_SHIFT_4K, &tces) == 0; } if (!prereg) { diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index e0a2d8e806ed..816055927ee4 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -19,6 +19,7 @@ #include #include #include +#include static DEFINE_MUTEX(mem_list_mutex); @@ -27,6 +28,7 @@ struct mm_iommu_table_group_mem_t { struct rcu_head rcu; unsigned long used; atomic64_t mapped; + unsigned int pageshift; u64 ua; /* userspace address */ u64 entries; /* number of entries in hpas[] */ u64 *hpas; /* vmalloc'ed */ @@ -126,6 +128,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, { struct mm_iommu_table_group_mem_t *mem; long i, j, ret = 0, locked_entries = 0; + unsigned int pageshift; + unsigned long flags; struct page *page = NULL; mutex_lock(&mem_list_mutex); @@ -160,6 +164,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, goto unlock_exit; } + /* + * For a starting point for a maximum page size calculation + * we use @ua and @entries natural alignment to allow IOMMU pages + * smaller than huge pages but still bigger than PAGE_SIZE. + */ + mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT)); mem->hpas = vzalloc(entries * sizeof(mem->hpas[0])); if (!mem->hpas) { kfree(mem); @@ -200,6 +210,23 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, } } populate: + pageshift = PAGE_SHIFT; + if (PageCompound(page)) { + pte_t *pte; + struct page *head = compound_head(page); + unsigned int compshift = compound_order(head); + + local_irq_save(flags); /* disables as well */ + pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift); + local_irq_restore(flags); + + /* Double check it is still the same pinned page */ + if (pte && pte_page(*pte) == head && + pageshift == compshift) + pageshift = max_t(unsigned int, pageshift, + PAGE_SHIFT); + } + mem->pageshift = min(mem->pageshift, pageshift); mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; } @@ -350,7 +377,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, EXPORT_SYMBOL_GPL(mm_iommu_find); long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa) + unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; u64 *va = &mem->hpas[entry]; @@ -358,6 +385,9 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, if (entry >= mem->entries) return -EFAULT; + if (pageshift > mem->pageshift) + return -EFAULT; + *hpa = *va | (ua & ~PAGE_MASK); return 0; @@ -365,7 +395,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa) + unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; void *va = &mem->hpas[entry]; @@ -374,6 +404,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, if (entry >= mem->entries) return -EFAULT; + if (pageshift > mem->pageshift) + return -EFAULT; + pa = (void *) vmalloc_to_phys(va); if (!pa) return -EFAULT; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index b751dd60e41a..b4c68f3b82be 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -467,7 +467,7 @@ static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container, if (!mem) return -EINVAL; - ret = mm_iommu_ua_to_hpa(mem, tce, phpa); + ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa); if (ret) return -EINVAL; -- GitLab From d0bd2c70ffcbf83ad02d551ad6f1c1a1a6664ef1 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 12 Jul 2018 13:02:53 -0400 Subject: [PATCH 0315/1291] drm/nouveau/drm/nouveau: Fix runtime PM leak in nv50_disp_atomic_commit() commit e5d54f1935722f83df7619f3978f774c2b802cd8 upstream. A CRTC being enabled doesn't mean it's on! It doesn't even necessarily mean it's being used. This fixes runtime PM leaks on the P50 I've got next to me. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nv50_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index a29474528e85..8ce39f460866 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -4150,7 +4150,7 @@ nv50_disp_atomic_commit(struct drm_device *dev, nv50_disp_atomic_commit_tail(state); drm_for_each_crtc(crtc, dev) { - if (crtc->state->enable) { + if (crtc->state->active) { if (!drm->have_disp_power_ref) { drm->have_disp_power_ref = true; return 0; -- GitLab From 7e454c18b76f07ff14666853aba38a4e6890921f Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 3 Jul 2018 16:31:41 -0400 Subject: [PATCH 0316/1291] drm/nouveau: Set DRIVER_ATOMIC cap earlier to fix debugfs commit eb493fbc150f4a28151ae1ee84f24395989f3600 upstream. Currently nouveau doesn't actually expose the state debugfs file that's usually provided for any modesetting driver that supports atomic, even if nouveau is loaded with atomic=1. This is due to the fact that the standard debugfs files that DRM creates for atomic drivers is called when drm_get_pci_dev() is called from nouveau_drm.c. This happens well before we've initialized the display core, which is currently responsible for setting the DRIVER_ATOMIC cap. So, move the atomic option into nouveau_drm.c and just add the DRIVER_ATOMIC cap whenever it's enabled on the kernel commandline. This shouldn't cause any actual issues, as the atomic ioctl will still fail as expected even if the display core doesn't disable it until later in the init sequence. This also provides the added benefit of being able to use the state debugfs file to check the current display state even if clients aren't allowed to modify it through anything other than the legacy ioctls. Additionally, disable the DRIVER_ATOMIC cap in nv04's display core, as this was already disabled there previously. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/dispnv04/disp.c | 3 +++ drivers/gpu/drm/nouveau/nouveau_drm.c | 7 +++++++ drivers/gpu/drm/nouveau/nv50_display.c | 6 ------ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c index 5b9d549aa791..e7926da59214 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c @@ -55,6 +55,9 @@ nv04_display_create(struct drm_device *dev) nouveau_display(dev)->init = nv04_display_init; nouveau_display(dev)->fini = nv04_display_fini; + /* Pre-nv50 doesn't support atomic, so don't expose the ioctls */ + dev->driver->driver_features &= ~DRIVER_ATOMIC; + nouveau_hw_save_vga_fonts(dev, 1); nv04_crtc_create(dev, 0); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 595630d1fb9e..362a34cb435d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -79,6 +79,10 @@ MODULE_PARM_DESC(modeset, "enable driver (default: auto, " int nouveau_modeset = -1; module_param_named(modeset, nouveau_modeset, int, 0400); +MODULE_PARM_DESC(atomic, "Expose atomic ioctl (default: disabled)"); +static int nouveau_atomic = 0; +module_param_named(atomic, nouveau_atomic, int, 0400); + MODULE_PARM_DESC(runpm, "disable (0), force enable (1), optimus only default (-1)"); static int nouveau_runtime_pm = -1; module_param_named(runpm, nouveau_runtime_pm, int, 0400); @@ -383,6 +387,9 @@ static int nouveau_drm_probe(struct pci_dev *pdev, pci_set_master(pdev); + if (nouveau_atomic) + driver_pci.driver_features |= DRIVER_ATOMIC; + ret = drm_get_pci_dev(pdev, pent, &driver_pci); if (ret) { nvkm_device_del(&device); diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 8ce39f460866..926ec51ba5be 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -4398,10 +4398,6 @@ nv50_display_destroy(struct drm_device *dev) kfree(disp); } -MODULE_PARM_DESC(atomic, "Expose atomic ioctl (default: disabled)"); -static int nouveau_atomic = 0; -module_param_named(atomic, nouveau_atomic, int, 0400); - int nv50_display_create(struct drm_device *dev) { @@ -4426,8 +4422,6 @@ nv50_display_create(struct drm_device *dev) disp->disp = &nouveau_display(dev)->disp; dev->mode_config.funcs = &nv50_disp_func; dev->driver->driver_features |= DRIVER_PREFER_XBGR_30BPP; - if (nouveau_atomic) - dev->driver->driver_features |= DRIVER_ATOMIC; /* small shared memory area we use for notifiers and semaphores */ ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM, -- GitLab From f1fb27fc256ce822ce22838c0edd862c76b04dc6 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Wed, 18 Jul 2018 14:49:36 -0400 Subject: [PATCH 0317/1291] bonding: set default miimon value for non-arp modes if not set [ Upstream commit c1f897ce186a529a494441642125479d38727a3d ] For some time now, if you load the bonding driver and configure bond parameters via sysfs using minimal config options, such as specifying nothing but the mode, relying on defaults for everything else, modes that cannot use arp monitoring (802.3ad, balance-tlb, balance-alb) all wind up with both arp_interval=0 (as it should be) and miimon=0, which means the miimon monitor thread never actually runs. This is particularly problematic for 802.3ad. For example, from an LNST recipe I've set up: $ modprobe bonding max_bonds=0" $ echo "+t_bond0" > /sys/class/net/bonding_masters" $ ip link set t_bond0 down" $ echo "802.3ad" > /sys/class/net/t_bond0/bonding/mode" $ ip link set ens1f1 down" $ echo "+ens1f1" > /sys/class/net/t_bond0/bonding/slaves" $ ip link set ens1f0 down" $ echo "+ens1f0" > /sys/class/net/t_bond0/bonding/slaves" $ ethtool -i t_bond0" $ ip link set ens1f1 up" $ ip link set ens1f0 up" $ ip link set t_bond0 up" $ ip addr add 192.168.9.1/24 dev t_bond0" $ ip addr add 2002::1/64 dev t_bond0" This bond comes up okay, but things look slightly suspect in /proc/net/bonding/t_bond0 output: $ grep -i mii /proc/net/bonding/t_bond0 MII Status: up MII Polling Interval (ms): 0 MII Status: up MII Status: up Now, pull a cable on one of the ports in the bond, then reconnect it, and you'll see: Slave Interface: ens1f0 MII Status: down Speed: 1000 Mbps Duplex: full I believe this became a major issue as of commit 4d2c0cda0744, which for 802.3ad bonds, sets slave->link = BOND_LINK_DOWN, with a comment about relying on link monitoring via miimon to set it correctly, but since the miimon work queue never runs, the link just stays marked down. If we simply tweak bond_option_mode_set() slightly, we can check for the non-arp modes having no miimon value set, and insert BOND_DEFAULT_MIIMON, which gets things back in full working order. This problem exists as far back as 4.14, and might be worth fixing in all stable trees since, though the work-around is to simply specify an miimon value yourself. Reported-by: Bob Ball Signed-off-by: Jarod Wilson Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_options.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 61084ba69a99..3d154eb63dcf 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -743,15 +743,20 @@ const struct bond_option *bond_opt_get(unsigned int option) static int bond_option_mode_set(struct bonding *bond, const struct bond_opt_value *newval) { - if (!bond_mode_uses_arp(newval->value) && bond->params.arp_interval) { - netdev_dbg(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n", - newval->string); - /* disable arp monitoring */ - bond->params.arp_interval = 0; - /* set miimon to default value */ - bond->params.miimon = BOND_DEFAULT_MIIMON; - netdev_dbg(bond->dev, "Setting MII monitoring interval to %d\n", - bond->params.miimon); + if (!bond_mode_uses_arp(newval->value)) { + if (bond->params.arp_interval) { + netdev_dbg(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n", + newval->string); + /* disable arp monitoring */ + bond->params.arp_interval = 0; + } + + if (!bond->params.miimon) { + /* set miimon to default value */ + bond->params.miimon = BOND_DEFAULT_MIIMON; + netdev_dbg(bond->dev, "Setting MII monitoring interval to %d\n", + bond->params.miimon); + } } if (newval->value == BOND_MODE_ALB) -- GitLab From c2ce657fd68cd617e7b8e014a141a2e15e799584 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 23 Jul 2018 16:50:48 +0200 Subject: [PATCH 0318/1291] ip: hash fragments consistently [ Upstream commit 3dd1c9a1270736029ffca670e9bd0265f4120600 ] The skb hash for locally generated ip[v6] fragments belonging to the same datagram can vary in several circumstances: * for connected UDP[v6] sockets, the first fragment get its hash via set_owner_w()/skb_set_hash_from_sk() * for unconnected IPv6 UDPv6 sockets, the first fragment can get its hash via ip6_make_flowlabel()/skb_get_hash_flowi6(), if auto_flowlabel is enabled For the following frags the hash is usually computed via skb_get_hash(). The above can cause OoO for unconnected IPv6 UDPv6 socket: in that scenario the egress tx queue can be selected on a per packet basis via the skb hash. It may also fool flow-oriented schedulers to place fragments belonging to the same datagram in different flows. Fix the issue by copying the skb hash from the head frag into the others at fragmentation time. Before this commit: perf probe -a "dev_queue_xmit skb skb->hash skb->l4_hash:b1@0/8 skb->sw_hash:b1@1/8" netperf -H $IPV4 -t UDP_STREAM -l 5 -- -m 2000 -n & perf record -e probe:dev_queue_xmit -e probe:skb_set_owner_w -a sleep 0.1 perf script probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=3713014309 l4_hash=1 sw_hash=0 probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=0 l4_hash=0 sw_hash=0 After this commit: probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=2171763177 l4_hash=1 sw_hash=0 probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=2171763177 l4_hash=1 sw_hash=0 Fixes: b73c3d0e4f0e ("net: Save TX flow hash in sock and set in skbuf on xmit") Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel") Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_output.c | 2 ++ net/ipv6/ip6_output.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 63d5d66e040a..e2dd325bed9b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -523,6 +523,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->dev = from->dev; to->mark = from->mark; + skb_copy_hash(to, from); + /* Copy the flags to each fragment. */ IPCB(to)->flags = IPCB(from)->flags; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 32fcce711855..1da021527fcd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -595,6 +595,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->dev = from->dev; to->mark = from->mark; + skb_copy_hash(to, from); + #ifdef CONFIG_NET_SCHED to->tc_index = from->tc_index; #endif -- GitLab From df20f746d68b6982923f0c318cc6a9347c96454f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 23 Jul 2018 19:36:48 -0400 Subject: [PATCH 0319/1291] ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull [ Upstream commit 2efd4fca703a6707cad16ab486eaab8fc7f0fd49 ] Syzbot reported a read beyond the end of the skb head when returning IPV6_ORIGDSTADDR: BUG: KMSAN: kernel-infoleak in put_cmsg+0x5ef/0x860 net/core/scm.c:242 CPU: 0 PID: 4501 Comm: syz-executor128 Not tainted 4.17.0+ #9 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:113 kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1125 kmsan_internal_check_memory+0x138/0x1f0 mm/kmsan/kmsan.c:1219 kmsan_copy_to_user+0x7a/0x160 mm/kmsan/kmsan.c:1261 copy_to_user include/linux/uaccess.h:184 [inline] put_cmsg+0x5ef/0x860 net/core/scm.c:242 ip6_datagram_recv_specific_ctl+0x1cf3/0x1eb0 net/ipv6/datagram.c:719 ip6_datagram_recv_ctl+0x41c/0x450 net/ipv6/datagram.c:733 rawv6_recvmsg+0x10fb/0x1460 net/ipv6/raw.c:521 [..] This logic and its ipv4 counterpart read the destination port from the packet at skb_transport_offset(skb) + 4. With MSG_MORE and a local SOCK_RAW sender, syzbot was able to cook a packet that stores headers exactly up to skb_transport_offset(skb) in the head and the remainder in a frag. Call pskb_may_pull before accessing the pointer to ensure that it lies in skb head. Link: http://lkml.kernel.org/r/CAF=yD-LEJwZj5a1-bAAj2Oy_hKmGygV6rsJ_WOrAYnv-fnayiQ@mail.gmail.com Reported-by: syzbot+9adb4b567003cac781f0@syzkaller.appspotmail.com Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_sockglue.c | 7 +++++-- net/ipv6/datagram.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index d07ba4d5917b..048d5f6dd320 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -148,15 +148,18 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { struct sockaddr_in sin; const struct iphdr *iph = ip_hdr(skb); - __be16 *ports = (__be16 *)skb_transport_header(skb); + __be16 *ports; + int end; - if (skb_transport_offset(skb) + 4 > (int)skb->len) + end = skb_transport_offset(skb) + 4; + if (end > 0 && !pskb_may_pull(skb, end)) return; /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ + ports = (__be16 *)skb_transport_header(skb); sin.sin_family = AF_INET; sin.sin_addr.s_addr = iph->daddr; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 453dc3726199..461825e0680f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -708,13 +708,16 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, } if (np->rxopt.bits.rxorigdstaddr) { struct sockaddr_in6 sin6; - __be16 *ports = (__be16 *) skb_transport_header(skb); + __be16 *ports; + int end; - if (skb_transport_offset(skb) + 4 <= (int)skb->len) { + end = skb_transport_offset(skb) + 4; + if (end <= 0 || pskb_may_pull(skb, end)) { /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ + ports = (__be16 *)skb_transport_header(skb); sin6.sin6_family = AF_INET6; sin6.sin6_addr = ipv6_hdr(skb)->daddr; -- GitLab From 6e92f04a4fef1dd178fe1e285e7304bc3bb42ba2 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 24 Jul 2018 14:27:55 +0300 Subject: [PATCH 0320/1291] net/mlx4_core: Save the qpn from the input modifier in RST2INIT wrapper [ Upstream commit 958c696f5a7274d9447a458ad7aa70719b29a50a ] Function mlx4_RST2INIT_QP_wrapper saved the qp number passed in the qp context, rather than the one passed in the input modifier. However, the qp number in the qp context is not defined as a required parameter by the FW. Therefore, drivers may choose to not specify the qp number in the qp context for the reset-to-init transition. Thus, we must save the qp number passed in the command input modifier -- which is always present. (This saved qp number is used as the input modifier for command 2RST_QP when a slave's qp's are destroyed). Fixes: c82e9aa0a8bc ("mlx4_core: resource tracking for HCA resources used by guests") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index a069fcc823c3..b26da0952a4d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2957,7 +2957,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, u32 srqn = qp_get_srqn(qpc) & 0xffffff; int use_srq = (qp_get_srqn(qpc) >> 24) & 1; struct res_srq *srq; - int local_qpn = be32_to_cpu(qpc->local_qpn) & 0xffffff; + int local_qpn = vhcr->in_modifier & 0xffffff; err = adjust_qp_sched_queue(dev, slave, qpc, inbox); if (err) -- GitLab From f208fbad98fde951c9783194e83ecfe004327b33 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Jul 2018 16:04:38 -0700 Subject: [PATCH 0321/1291] net: skb_segment() should not return NULL [ Upstream commit ff907a11a0d68a749ce1a321f4505c03bf72190c ] syzbot caught a NULL deref [1], caused by skb_segment() skb_segment() has many "goto err;" that assume the @err variable contains -ENOMEM. A successful call to __skb_linearize() should not clear @err, otherwise a subsequent memory allocation error could return NULL. While we are at it, we might use -EINVAL instead of -ENOMEM when MAX_SKB_FRAGS limit is reached. [1] kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN CPU: 0 PID: 13285 Comm: syz-executor3 Not tainted 4.18.0-rc4+ #146 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:tcp_gso_segment+0x3dc/0x1780 net/ipv4/tcp_offload.c:106 Code: f0 ff ff 0f 87 1c fd ff ff e8 00 88 0b fb 48 8b 75 d0 48 b9 00 00 00 00 00 fc ff df 48 8d be 90 00 00 00 48 89 f8 48 c1 e8 03 <0f> b6 14 08 48 8d 86 94 00 00 00 48 89 c6 83 e0 07 48 c1 ee 03 0f RSP: 0018:ffff88019b7fd060 EFLAGS: 00010206 RAX: 0000000000000012 RBX: 0000000000000020 RCX: dffffc0000000000 RDX: 0000000000040000 RSI: 0000000000000000 RDI: 0000000000000090 RBP: ffff88019b7fd0f0 R08: ffff88019510e0c0 R09: ffffed003b5c46d6 R10: ffffed003b5c46d6 R11: ffff8801dae236b3 R12: 0000000000000001 R13: ffff8801d6c581f4 R14: 0000000000000000 R15: ffff8801d6c58128 FS: 00007fcae64d6700(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004e8664 CR3: 00000001b669b000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcp4_gso_segment+0x1c3/0x440 net/ipv4/tcp_offload.c:54 inet_gso_segment+0x64e/0x12d0 net/ipv4/af_inet.c:1342 inet_gso_segment+0x64e/0x12d0 net/ipv4/af_inet.c:1342 skb_mac_gso_segment+0x3b5/0x740 net/core/dev.c:2792 __skb_gso_segment+0x3c3/0x880 net/core/dev.c:2865 skb_gso_segment include/linux/netdevice.h:4099 [inline] validate_xmit_skb+0x640/0xf30 net/core/dev.c:3104 __dev_queue_xmit+0xc14/0x3910 net/core/dev.c:3561 dev_queue_xmit+0x17/0x20 net/core/dev.c:3602 neigh_hh_output include/net/neighbour.h:473 [inline] neigh_output include/net/neighbour.h:481 [inline] ip_finish_output2+0x1063/0x1860 net/ipv4/ip_output.c:229 ip_finish_output+0x841/0xfa0 net/ipv4/ip_output.c:317 NF_HOOK_COND include/linux/netfilter.h:276 [inline] ip_output+0x223/0x880 net/ipv4/ip_output.c:405 dst_output include/net/dst.h:444 [inline] ip_local_out+0xc5/0x1b0 net/ipv4/ip_output.c:124 iptunnel_xmit+0x567/0x850 net/ipv4/ip_tunnel_core.c:91 ip_tunnel_xmit+0x1598/0x3af1 net/ipv4/ip_tunnel.c:778 ipip_tunnel_xmit+0x264/0x2c0 net/ipv4/ipip.c:308 __netdev_start_xmit include/linux/netdevice.h:4148 [inline] netdev_start_xmit include/linux/netdevice.h:4157 [inline] xmit_one net/core/dev.c:3034 [inline] dev_hard_start_xmit+0x26c/0xc30 net/core/dev.c:3050 __dev_queue_xmit+0x29ef/0x3910 net/core/dev.c:3569 dev_queue_xmit+0x17/0x20 net/core/dev.c:3602 neigh_direct_output+0x15/0x20 net/core/neighbour.c:1403 neigh_output include/net/neighbour.h:483 [inline] ip_finish_output2+0xa67/0x1860 net/ipv4/ip_output.c:229 ip_finish_output+0x841/0xfa0 net/ipv4/ip_output.c:317 NF_HOOK_COND include/linux/netfilter.h:276 [inline] ip_output+0x223/0x880 net/ipv4/ip_output.c:405 dst_output include/net/dst.h:444 [inline] ip_local_out+0xc5/0x1b0 net/ipv4/ip_output.c:124 ip_queue_xmit+0x9df/0x1f80 net/ipv4/ip_output.c:504 tcp_transmit_skb+0x1bf9/0x3f10 net/ipv4/tcp_output.c:1168 tcp_write_xmit+0x1641/0x5c20 net/ipv4/tcp_output.c:2363 __tcp_push_pending_frames+0xb2/0x290 net/ipv4/tcp_output.c:2536 tcp_push+0x638/0x8c0 net/ipv4/tcp.c:735 tcp_sendmsg_locked+0x2ec5/0x3f00 net/ipv4/tcp.c:1410 tcp_sendmsg+0x2f/0x50 net/ipv4/tcp.c:1447 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 sock_sendmsg_nosec net/socket.c:641 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:651 __sys_sendto+0x3d7/0x670 net/socket.c:1797 __do_sys_sendto net/socket.c:1809 [inline] __se_sys_sendto net/socket.c:1805 [inline] __x64_sys_sendto+0xe1/0x1a0 net/socket.c:1805 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x455ab9 Code: 1d ba fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb b9 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fcae64d5c68 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007fcae64d66d4 RCX: 0000000000455ab9 RDX: 0000000000000001 RSI: 0000000020000200 RDI: 0000000000000013 RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000014 R13: 00000000004c1145 R14: 00000000004d1818 R15: 0000000000000006 Modules linked in: Dumping ftrace buffer: (ftrace buffer empty) Fixes: ddff00d42043 ("net: Move skb_has_shared_frag check out of GRE code and into segmentation") Signed-off-by: Eric Dumazet Cc: Alexander Duyck Reported-by: syzbot Acked-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 23041b5c0b27..2e5eeba97de9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3675,6 +3675,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, net_warn_ratelimited( "skb_segment: too many frags: %u %u\n", pos, mss); + err = -EINVAL; goto err; } @@ -3713,11 +3714,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, perform_csum_check: if (!csum) { - if (skb_has_shared_frag(nskb)) { - err = __skb_linearize(nskb); - if (err) - goto err; - } + if (skb_has_shared_frag(nskb) && + __skb_linearize(nskb)) + goto err; + if (!nskb->remcsum_offload) nskb->ip_summed = CHECKSUM_NONE; SKB_GSO_CB(nskb)->csum = -- GitLab From 291d99ac4dc273b64b7d98b98f494e18da716744 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Mon, 25 Jun 2018 19:12:02 +0300 Subject: [PATCH 0322/1291] net/mlx5: Adjust clock overflow work period [ Upstream commit 33180bee86a8940a84950edca46315cd9dd6deb5 ] When driver converts HW timestamp to wall clock time it subtracts the last saved cycle counter from the HW timestamp and converts the difference to nanoseconds. The conversion is done by multiplying the cycles difference with the clock multiplier value as a first step and therefore the cycles difference should be small enough so that the multiplication product doesn't exceed 64bit. The overflow handling routine is in charge of updating the last saved cycle counter in driver and it is called periodically using kernel delayed workqueue. The delay period for this work is calculated using the max HW cycle counter value (a 41 bit mask) as a base which doesn't take the 64bit limit into account so the delay period may be incorrect and too long to prevent a large difference between the HW counter and the last saved counter in SW. This change adjusts the work period for the HW clock overflow work by taking the minimum between the previous value and the quotient of max u64 value and the clock multiplier value. Fixes: ef9814deafd0 ("net/mlx5e: Add HW timestamping (TS) support") Signed-off-by: Ariel Levkovich Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 84dd63e74041..27040009d87a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -545,6 +545,7 @@ void mlx5e_pps_event_handler(struct mlx5e_priv *priv, void mlx5e_timestamp_init(struct mlx5e_priv *priv) { struct mlx5e_tstamp *tstamp = &priv->tstamp; + u64 overflow_cycles; u64 ns; u64 frac = 0; u32 dev_freq; @@ -569,10 +570,17 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) /* Calculate period in seconds to call the overflow watchdog - to make * sure counter is checked at least once every wrap around. + * The period is calculated as the minimum between max HW cycles count + * (The clock source mask) and max amount of cycles that can be + * multiplied by clock multiplier where the result doesn't exceed + * 64bits. */ - ns = cyclecounter_cyc2ns(&tstamp->cycles, tstamp->cycles.mask, + overflow_cycles = div64_u64(~0ULL >> 1, tstamp->cycles.mult); + overflow_cycles = min(overflow_cycles, tstamp->cycles.mask >> 1); + + ns = cyclecounter_cyc2ns(&tstamp->cycles, overflow_cycles, frac, &frac); - do_div(ns, NSEC_PER_SEC / 2 / HZ); + do_div(ns, NSEC_PER_SEC / HZ); tstamp->overflow_period = ns; INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out); -- GitLab From c83cd44202b503269388347a959017ebb6573963 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sun, 8 Jul 2018 14:52:12 +0300 Subject: [PATCH 0323/1291] net/mlx5e: Don't allow aRFS for encapsulated packets [ Upstream commit d2e1c57bcf9a07cbb67f30ecf238f298799bce1c ] Driver is yet to support aRFS for encapsulated packets, return early error in such case. Fixes: 18c908e477dc ("net/mlx5e: Add accelerated RFS support") Signed-off-by: Eran Ben Elisha Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 12d3ced61114..0f0c1d7b80c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -711,6 +711,9 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, skb->protocol != htons(ETH_P_IPV6)) return -EPROTONOSUPPORT; + if (skb->encapsulation) + return -EPROTONOSUPPORT; + arfs_t = arfs_get_table(arfs, arfs_get_ip_proto(skb), skb->protocol); if (!arfs_t) return -EPROTONOSUPPORT; -- GitLab From 047af2d8ced33f16815e3650bfd5e6c732229634 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sun, 8 Jul 2018 13:08:55 +0300 Subject: [PATCH 0324/1291] net/mlx5e: Fix quota counting in aRFS expire flow [ Upstream commit 2630bae8018823c3b88788b69fb9f16ea3b4a11e ] Quota should follow the amount of rules which do expire, and not the number of rules that were examined, fixed that. Fixes: 18c908e477dc ("net/mlx5e: Add accelerated RFS support") Signed-off-by: Eran Ben Elisha Reviewed-by: Maor Gottlieb Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 0f0c1d7b80c0..e87923e046c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -381,14 +381,14 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv) HLIST_HEAD(del_list); spin_lock_bh(&priv->fs.arfs.arfs_lock); mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) { - if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA) - break; if (!work_pending(&arfs_rule->arfs_work) && rps_may_expire_flow(priv->netdev, arfs_rule->rxq, arfs_rule->flow_id, arfs_rule->filter_id)) { hlist_del_init(&arfs_rule->hlist); hlist_add_head(&arfs_rule->hlist, &del_list); + if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA) + break; } } spin_unlock_bh(&priv->fs.arfs.arfs_lock); -- GitLab From 6d5b7d68f45b1208493385a8490b65ff38c4fafe Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 19 Jul 2018 12:41:18 -0700 Subject: [PATCH 0325/1291] net/ipv6: Fix linklocal to global address with VRF [ Upstream commit 24b711edfc34bc45777a3f068812b7d1ed004a5d ] Example setup: host: ip -6 addr add dev eth1 2001:db8:104::4 where eth1 is enslaved to a VRF switch: ip -6 ro add 2001:db8:104::4/128 dev br1 where br1 only has an LLA ping6 2001:db8:104::4 ssh 2001:db8:104::4 (NOTE: UDP works fine if the PKTINFO has the address set to the global address and ifindex is set to the index of eth1 with a destination an LLA). For ICMP, icmp6_iif needs to be updated to check if skb->dev is an L3 master. If it is then return the ifindex from rt6i_idev similar to what is done for loopback. For TCP, restore the original tcp_v6_iif definition which is needed in most places and add a new tcp_v6_iif_l3_slave that considers the l3_slave variability. This latter check is only needed for socket lookups. Fixes: 9ff74384600a ("net: vrf: Handle ipv6 multicast and link-local addresses") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 5 +++++ net/ipv6/icmp.c | 5 +++-- net/ipv6/tcp_ipv6.c | 6 ++++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index fb653736f335..3b49f3aafed7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -857,6 +857,11 @@ struct tcp_skb_cb { * as TCP moves IP6CB into a different location in skb->cb[] */ static inline int tcp_v6_iif(const struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->header.h6.iif; +} + +static inline int tcp_v6_iif_l3_slave(const struct sk_buff *skb) { bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 5acb54405b10..c5f2b17b7ee1 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -405,9 +405,10 @@ static int icmp6_iif(const struct sk_buff *skb) /* for local traffic to local address, skb dev is the loopback * device. Check if there is a dst attached to the skb and if so - * get the real device index. + * get the real device index. Same is needed for replies to a link + * local address on a device enslaved to an L3 master device */ - if (unlikely(iif == LOOPBACK_IFINDEX)) { + if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { const struct rt6_info *rt6 = skb_rt6_info(skb); if (rt6) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 35e8aef9ceed..ba8586aadffa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -918,7 +918,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) &tcp_hashinfo, NULL, 0, &ipv6h->saddr, th->source, &ipv6h->daddr, - ntohs(th->source), tcp_v6_iif(skb), + ntohs(th->source), + tcp_v6_iif_l3_slave(skb), tcp_v6_sdif(skb)); if (!sk1) goto out; @@ -1573,7 +1574,8 @@ static int tcp_v6_rcv(struct sk_buff *skb) skb, __tcp_hdrlen(th), &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, - ntohs(th->dest), tcp_v6_iif(skb), + ntohs(th->dest), + tcp_v6_iif_l3_slave(skb), sdif); if (sk2) { struct inet_timewait_sock *tw = inet_twsk(sk); -- GitLab From 46f9e1d0bd4e52cd4fefcc6a7a28066f877857af Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 20 Jul 2018 14:04:27 +0800 Subject: [PATCH 0326/1291] multicast: do not restore deleted record source filter mode to new one There are two scenarios that we will restore deleted records. The first is when device down and up(or unmap/remap). In this scenario the new filter mode is same with previous one. Because we get it from in_dev->mc_list and we do not touch it during device down and up. The other scenario is when a new socket join a group which was just delete and not finish sending status reports. In this scenario, we should use the current filter mode instead of restore old one. Here are 4 cases in total. old_socket new_socket before_fix after_fix IN(A) IN(A) ALLOW(A) ALLOW(A) IN(A) EX( ) TO_IN( ) TO_EX( ) EX( ) IN(A) TO_EX( ) ALLOW(A) EX( ) EX( ) TO_EX( ) TO_EX( ) Fixes: 24803f38a5c0b (igmp: do not remove igmp souce list info when set link down) Fixes: 1666d49e1d416 (mld: do not remove mld souce list info when set link down) Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/igmp.c | 3 +-- net/ipv6/mcast.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index fbeb35ad804b..502aae3e3ab8 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1201,8 +1201,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) if (pmc) { im->interface = pmc->interface; im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; - im->sfmode = pmc->sfmode; - if (pmc->sfmode == MCAST_INCLUDE) { + if (im->sfmode == MCAST_INCLUDE) { im->tomb = pmc->tomb; im->sources = pmc->sources; for (psf = im->sources; psf; psf = psf->sf_next) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9a38a2c641fa..6fd913d63835 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -771,8 +771,7 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) if (pmc) { im->idev = pmc->idev; im->mca_crcount = idev->mc_qrv; - im->mca_sfmode = pmc->mca_sfmode; - if (pmc->mca_sfmode == MCAST_INCLUDE) { + if (im->mca_sfmode == MCAST_INCLUDE) { im->mca_tomb = pmc->mca_tomb; im->mca_sources = pmc->mca_sources; for (psf = im->mca_sources; psf; psf = psf->sf_next) -- GitLab From 50b464d33964282fb09be7d6d8a6063475f63814 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 19 Jul 2018 08:15:16 +0200 Subject: [PATCH 0327/1291] net: phy: consider PHY_IGNORE_INTERRUPT in phy_start_aneg_priv [ Upstream commit 215d08a85b9acf5e1fe9dbf50f1774cde333efef ] The situation described in the comment can occur also with PHY_IGNORE_INTERRUPT, therefore change the condition to include it. Fixes: f555f34fdc58 ("net: phy: fix auto-negotiation stall due to unavailable interrupt") Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index dba6d17ad885..47d2ef2fb9b3 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -511,7 +511,7 @@ static int phy_start_aneg_priv(struct phy_device *phydev, bool sync) * negotiation may already be done and aneg interrupt may not be * generated. */ - if (phy_interrupt_is_valid(phydev) && (phydev->state == PHY_AN)) { + if (phydev->irq != PHY_POLL && phydev->state == PHY_AN) { err = phy_aneg_done(phydev); if (err > 0) { trigger = true; -- GitLab From 464e2326a7f5cd39d82f96750bb56a9d302edf8d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 23 Jul 2018 22:37:54 +0200 Subject: [PATCH 0328/1291] sock: fix sg page frag coalescing in sk_alloc_sg [ Upstream commit 144fe2bfd236dc814eae587aea7e2af03dbdd755 ] Current sg coalescing logic in sk_alloc_sg() (latter is used by tls and sockmap) is not quite correct in that we do fetch the previous sg entry, however the subsequent check whether the refilled page frag from the socket is still the same as from the last entry with prior offset and length matching the start of the current buffer is comparing always the first sg list entry instead of the prior one. Fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Daniel Borkmann Acked-by: Dave Watson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 8ee4e667a414..fb79caf56d0e 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -135,9 +135,10 @@ static int alloc_sg(struct sock *sk, int len, struct scatterlist *sg, pfrag->offset += use; sge = sg + num_elem - 1; - if (num_elem > first_coalesce && sg_page(sg) == pfrag->page && - sg->offset + sg->length == orig_offset) { - sg->length += use; + + if (num_elem > first_coalesce && sg_page(sge) == pfrag->page && + sge->offset + sge->length == orig_offset) { + sge->length += use; } else { sge++; sg_unmark_end(sge); -- GitLab From 23557c5d34b96e1129c8567a26764faa587dd659 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Fri, 20 Jul 2018 13:21:01 -0700 Subject: [PATCH 0329/1291] rtnetlink: add rtnl_link_state check in rtnl_configure_link [ Upstream commit 5025f7f7d506fba9b39e7fe8ca10f6f34cb9bc2d ] rtnl_configure_link sets dev->rtnl_link_state to RTNL_LINK_INITIALIZED and unconditionally calls __dev_notify_flags to notify user-space of dev flags. current call sequence for rtnl_configure_link rtnetlink_newlink rtnl_link_ops->newlink rtnl_configure_link (unconditionally notifies userspace of default and new dev flags) If a newlink handler wants to call rtnl_configure_link early, we will end up with duplicate notifications to user-space. This patch fixes rtnl_configure_link to check rtnl_link_state and call __dev_notify_flags with gchanges = 0 if already RTNL_LINK_INITIALIZED. Later in the series, this patch will help the following sequence where a driver implementing newlink can call rtnl_configure_link to initialize the link early. makes the following call sequence work: rtnetlink_newlink rtnl_link_ops->newlink (vxlan) -> rtnl_configure_link (initializes link and notifies user-space of default dev flags) rtnl_configure_link (updates dev flags if requested by user ifm and notifies user-space of new dev flags) Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4cfdad08aca0..efe396cc77b5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2402,9 +2402,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) return err; } - dev->rtnl_link_state = RTNL_LINK_INITIALIZED; - - __dev_notify_flags(dev, old_flags, ~0U); + if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) { + __dev_notify_flags(dev, old_flags, 0U); + } else { + dev->rtnl_link_state = RTNL_LINK_INITIALIZED; + __dev_notify_flags(dev, old_flags, ~0U); + } return 0; } EXPORT_SYMBOL(rtnl_configure_link); -- GitLab From 1c345a5292587b29a99d30074fbb1759bccc43b5 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Fri, 20 Jul 2018 13:21:02 -0700 Subject: [PATCH 0330/1291] vxlan: add new fdb alloc and create helpers [ Upstream commit 7431016b107c95cb5b2014aa1901fcb115f746bc ] - Add new vxlan_fdb_alloc helper - rename existing vxlan_fdb_create into vxlan_fdb_update: because it really creates or updates an existing fdb entry - move new fdb creation into a separate vxlan_fdb_create Main motivation for this change is to introduce the ability to decouple vxlan fdb creation and notify, used in a later patch. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 91 ++++++++++++++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 29 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index bbdb46916dc3..d72c24d504d8 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -636,8 +636,61 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr)); } -/* Add new entry to forwarding table -- assumes lock held */ +static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, + const u8 *mac, __u16 state, + __be32 src_vni, __u8 ndm_flags) +{ + struct vxlan_fdb *f; + + f = kmalloc(sizeof(*f), GFP_ATOMIC); + if (!f) + return NULL; + f->state = state; + f->flags = ndm_flags; + f->updated = f->used = jiffies; + f->vni = src_vni; + INIT_LIST_HEAD(&f->remotes); + memcpy(f->eth_addr, mac, ETH_ALEN); + + return f; +} + static int vxlan_fdb_create(struct vxlan_dev *vxlan, + const u8 *mac, union vxlan_addr *ip, + __u16 state, __be16 port, __be32 src_vni, + __be32 vni, __u32 ifindex, __u8 ndm_flags, + struct vxlan_fdb **fdb) +{ + struct vxlan_rdst *rd = NULL; + struct vxlan_fdb *f; + int rc; + + if (vxlan->cfg.addrmax && + vxlan->addrcnt >= vxlan->cfg.addrmax) + return -ENOSPC; + + netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip); + f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags); + if (!f) + return -ENOMEM; + + rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); + if (rc < 0) { + kfree(f); + return rc; + } + + ++vxlan->addrcnt; + hlist_add_head_rcu(&f->hlist, + vxlan_fdb_head(vxlan, mac, src_vni)); + + *fdb = f; + + return 0; +} + +/* Add new entry to forwarding table -- assumes lock held */ +static int vxlan_fdb_update(struct vxlan_dev *vxlan, const u8 *mac, union vxlan_addr *ip, __u16 state, __u16 flags, __be16 port, __be32 src_vni, __be32 vni, @@ -687,37 +740,17 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, if (!(flags & NLM_F_CREATE)) return -ENOENT; - if (vxlan->cfg.addrmax && - vxlan->addrcnt >= vxlan->cfg.addrmax) - return -ENOSPC; - /* Disallow replace to add a multicast entry */ if ((flags & NLM_F_REPLACE) && (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac))) return -EOPNOTSUPP; netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip); - f = kmalloc(sizeof(*f), GFP_ATOMIC); - if (!f) - return -ENOMEM; - - notify = 1; - f->state = state; - f->flags = ndm_flags; - f->updated = f->used = jiffies; - f->vni = src_vni; - INIT_LIST_HEAD(&f->remotes); - memcpy(f->eth_addr, mac, ETH_ALEN); - - rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); - if (rc < 0) { - kfree(f); + rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni, + vni, ifindex, ndm_flags, &f); + if (rc < 0) return rc; - } - - ++vxlan->addrcnt; - hlist_add_head_rcu(&f->hlist, - vxlan_fdb_head(vxlan, mac, src_vni)); + notify = 1; } if (notify) { @@ -863,7 +896,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EAFNOSUPPORT; spin_lock_bh(&vxlan->hash_lock); - err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags, + err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags, port, src_vni, vni, ifindex, ndm->ndm_flags); spin_unlock_bh(&vxlan->hash_lock); @@ -1006,7 +1039,7 @@ static bool vxlan_snoop(struct net_device *dev, /* close off race between vxlan_flush and incoming packets */ if (netif_running(dev)) - vxlan_fdb_create(vxlan, src_mac, src_ip, + vxlan_fdb_update(vxlan, src_mac, src_ip, NUD_REACHABLE, NLM_F_EXCL|NLM_F_CREATE, vxlan->cfg.dst_port, @@ -3167,7 +3200,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, /* create an fdb entry for a valid default destination */ if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) { - err = vxlan_fdb_create(vxlan, all_zeros_mac, + err = vxlan_fdb_update(vxlan, all_zeros_mac, &vxlan->default_dst.remote_ip, NUD_REACHABLE | NUD_PERMANENT, NLM_F_EXCL | NLM_F_CREATE, @@ -3441,7 +3474,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], old_dst.remote_ifindex, 0); if (!vxlan_addr_any(&dst->remote_ip)) { - err = vxlan_fdb_create(vxlan, all_zeros_mac, + err = vxlan_fdb_update(vxlan, all_zeros_mac, &dst->remote_ip, NUD_REACHABLE | NUD_PERMANENT, NLM_F_CREATE | NLM_F_APPEND, -- GitLab From bb0335aacfddee9b7d06a1e2fdca0b62140c791c Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Fri, 20 Jul 2018 13:21:03 -0700 Subject: [PATCH 0331/1291] vxlan: make netlink notify in vxlan_fdb_destroy optional [ Upstream commit f6e053858671bb156b6e44ad66418acc8c7f4e77 ] Add a new option do_notify to vxlan_fdb_destroy to make sending netlink notify optional. Used by a later patch. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index d72c24d504d8..5ee0f6d9f7bc 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -774,13 +774,15 @@ static void vxlan_fdb_free(struct rcu_head *head) kfree(f); } -static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) +static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, + bool do_notify) { netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr); --vxlan->addrcnt; - vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH); + if (do_notify) + vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH); hlist_del_rcu(&f->hlist); call_rcu(&f->rcu, vxlan_fdb_free); @@ -930,7 +932,7 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan, goto out; } - vxlan_fdb_destroy(vxlan, f); + vxlan_fdb_destroy(vxlan, f, true); out: return 0; @@ -2393,7 +2395,7 @@ static void vxlan_cleanup(unsigned long arg) "garbage collect %pM\n", f->eth_addr); f->state = NUD_STALE; - vxlan_fdb_destroy(vxlan, f); + vxlan_fdb_destroy(vxlan, f, true); } else if (time_before(timeout, next_timer)) next_timer = timeout; } @@ -2444,7 +2446,7 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni) spin_lock_bh(&vxlan->hash_lock); f = __vxlan_find_mac(vxlan, all_zeros_mac, vni); if (f) - vxlan_fdb_destroy(vxlan, f); + vxlan_fdb_destroy(vxlan, f, true); spin_unlock_bh(&vxlan->hash_lock); } @@ -2498,7 +2500,7 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all) continue; /* the all_zeros_mac entry is deleted at vxlan_uninit */ if (!is_zero_ether_addr(f->eth_addr)) - vxlan_fdb_destroy(vxlan, f); + vxlan_fdb_destroy(vxlan, f, true); } } spin_unlock_bh(&vxlan->hash_lock); -- GitLab From 68974d0b9c8662744afdad22dbcb7d93b29a7b94 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Fri, 20 Jul 2018 13:21:04 -0700 Subject: [PATCH 0332/1291] vxlan: fix default fdb entry netlink notify ordering during netdev create [ Upstream commit e99465b952861533d9ba748fdbecc96d9a36da3e ] Problem: In vxlan_newlink, a default fdb entry is added before register_netdev. The default fdb creation function also notifies user-space of the fdb entry on the vxlan device which user-space does not know about yet. (RTM_NEWNEIGH goes before RTM_NEWLINK for the same ifindex). This patch fixes the user-space netlink notification ordering issue with the following changes: - decouple fdb notify from fdb create. - Move fdb notify after register_netdev. - Call rtnl_configure_link in vxlan newlink handler to notify userspace about the newlink before fdb notify and hence avoiding the user-space race. Fixes: afbd8bae9c79 ("vxlan: add implicit fdb entry for default destination") Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 5ee0f6d9f7bc..13d39a72fe0d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3192,6 +3192,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, { struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb *f = NULL; int err; err = vxlan_dev_configure(net, dev, conf, false, extack); @@ -3202,27 +3203,38 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, /* create an fdb entry for a valid default destination */ if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) { - err = vxlan_fdb_update(vxlan, all_zeros_mac, + err = vxlan_fdb_create(vxlan, all_zeros_mac, &vxlan->default_dst.remote_ip, NUD_REACHABLE | NUD_PERMANENT, - NLM_F_EXCL | NLM_F_CREATE, vxlan->cfg.dst_port, vxlan->default_dst.remote_vni, vxlan->default_dst.remote_vni, vxlan->default_dst.remote_ifindex, - NTF_SELF); + NTF_SELF, &f); if (err) return err; } err = register_netdevice(dev); + if (err) + goto errout; + + err = rtnl_configure_link(dev, NULL); if (err) { - vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni); - return err; + unregister_netdevice(dev); + goto errout; } + /* notify default fdb entry */ + if (f) + vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH); + list_add(&vxlan->next, &vn->vxlan_list); return 0; +errout: + if (f) + vxlan_fdb_destroy(vxlan, f, false); + return err; } static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], @@ -3451,6 +3463,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], struct vxlan_rdst *dst = &vxlan->default_dst; struct vxlan_rdst old_dst; struct vxlan_config conf; + struct vxlan_fdb *f = NULL; int err; err = vxlan_nl2conf(tb, data, @@ -3476,19 +3489,19 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], old_dst.remote_ifindex, 0); if (!vxlan_addr_any(&dst->remote_ip)) { - err = vxlan_fdb_update(vxlan, all_zeros_mac, + err = vxlan_fdb_create(vxlan, all_zeros_mac, &dst->remote_ip, NUD_REACHABLE | NUD_PERMANENT, - NLM_F_CREATE | NLM_F_APPEND, vxlan->cfg.dst_port, dst->remote_vni, dst->remote_vni, dst->remote_ifindex, - NTF_SELF); + NTF_SELF, &f); if (err) { spin_unlock_bh(&vxlan->hash_lock); return err; } + vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH); } spin_unlock_bh(&vxlan->hash_lock); } -- GitLab From 68c9bdfc8b425671003e37b27a400e77d288a8fb Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jul 2018 06:04:52 -0700 Subject: [PATCH 0333/1291] tcp: fix dctcp delayed ACK schedule [ Upstream commit b0c05d0e99d98d7f0cd41efc1eeec94efdc3325d ] Previously, when a data segment was sent an ACK was piggybacked on the data segment without generating a CA_EVENT_NON_DELAYED_ACK event to notify congestion control modules. So the DCTCP ca->delayed_ack_reserved flag could incorrectly stay set when in fact there were no delayed ACKs being reserved. This could result in sending a special ECN notification ACK that carries an older ACK sequence, when in fact there was no need for such an ACK. DCTCP keeps track of the delayed ACK status with its own separate state ca->delayed_ack_reserved. Previously it may accidentally cancel the delayed ACK without updating this field upon sending a special ACK that carries a older ACK sequence. This inconsistency would lead to DCTCP receiver never acknowledging the latest data until the sender times out and retry in some cases. Packetdrill script (provided by Larry Brakmo) 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0 0.000 bind(3, ..., ...) = 0 0.000 listen(3, 1) = 0 0.100 < [ect0] SEW 0:0(0) win 32792 0.100 > SE. 0:0(0) ack 1 0.110 < [ect0] . 1:1(0) ack 1 win 257 0.200 accept(3, ..., ...) = 4 0.200 < [ect0] . 1:1001(1000) ack 1 win 257 0.200 > [ect01] . 1:1(0) ack 1001 0.200 write(4, ..., 1) = 1 0.200 > [ect01] P. 1:2(1) ack 1001 0.200 < [ect0] . 1001:2001(1000) ack 2 win 257 0.200 write(4, ..., 1) = 1 0.200 > [ect01] P. 2:3(1) ack 2001 0.200 < [ect0] . 2001:3001(1000) ack 3 win 257 0.200 < [ect0] . 3001:4001(1000) ack 3 win 257 0.200 > [ect01] . 3:3(0) ack 4001 0.210 < [ce] P. 4001:4501(500) ack 3 win 257 +0.001 read(4, ..., 4500) = 4500 +0 write(4, ..., 1) = 1 +0 > [ect01] PE. 3:4(1) ack 4501 +0.010 < [ect0] W. 4501:5501(1000) ack 4 win 257 // Previously the ACK sequence below would be 4501, causing a long RTO +0.040~+0.045 > [ect01] . 4:4(0) ack 5501 // delayed ack +0.311 < [ect0] . 5501:6501(1000) ack 4 win 257 // More data +0 > [ect01] . 4:4(0) ack 6501 // now acks everything +0.500 < F. 9501:9501(0) ack 4 win 257 Reported-by: Larry Brakmo Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Lawrence Brakmo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_dctcp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 5f5e5936760e..89f88b0d8167 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -134,7 +134,8 @@ static void dctcp_ce_state_0_to_1(struct sock *sk) /* State has changed from CE=0 to CE=1 and delayed * ACK has not sent yet. */ - if (!ca->ce_state && ca->delayed_ack_reserved) { + if (!ca->ce_state && + inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { u32 tmp_rcv_nxt; /* Save current rcv_nxt. */ @@ -164,7 +165,8 @@ static void dctcp_ce_state_1_to_0(struct sock *sk) /* State has changed from CE=1 to CE=0 and delayed * ACK has not sent yet. */ - if (ca->ce_state && ca->delayed_ack_reserved) { + if (ca->ce_state && + inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { u32 tmp_rcv_nxt; /* Save current rcv_nxt. */ -- GitLab From f7f24b36938367d6e97d43f7dd53f0c5714cce1a Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 18 Jul 2018 13:56:34 -0700 Subject: [PATCH 0334/1291] tcp: helpers to send special DCTCP ack [ Upstream commit 2987babb6982306509380fc11b450227a844493b ] Refactor and create helpers to send the special ACK in DCTCP. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index abae5196cd3a..c8da25f37ae4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -984,8 +984,8 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb) * We are working here with either a clone of the original * SKB, or a fresh unique copy made by the retransmit engine. */ -static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, - gfp_t gfp_mask) +static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, + int clone_it, gfp_t gfp_mask, u32 rcv_nxt) { const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet; @@ -1057,7 +1057,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->source = inet->inet_sport; th->dest = inet->inet_dport; th->seq = htonl(tcb->seq); - th->ack_seq = htonl(tp->rcv_nxt); + th->ack_seq = htonl(rcv_nxt); *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->tcp_flags); @@ -1135,6 +1135,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, return err; } +static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, + gfp_t gfp_mask) +{ + return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask, + tcp_sk(sk)->rcv_nxt); +} + /* This routine just queues the buffer for sending. * * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, @@ -3551,7 +3558,7 @@ void tcp_send_delayed_ack(struct sock *sk) } /* This routine sends an ack and also updates the window. */ -void tcp_send_ack(struct sock *sk) +void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) { struct sk_buff *buff; @@ -3586,7 +3593,12 @@ void tcp_send_ack(struct sock *sk) skb_set_tcp_pure_ack(buff); /* Send it off, this clears delayed acks for us. */ - tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0); + __tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt); +} + +void tcp_send_ack(struct sock *sk) +{ + __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt); } EXPORT_SYMBOL_GPL(tcp_send_ack); -- GitLab From 78636179f6e69f71f2d16e8716c096f297be9356 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 18 Jul 2018 13:56:35 -0700 Subject: [PATCH 0335/1291] tcp: do not cancel delay-AcK on DCTCP special ACK [ Upstream commit 27cde44a259c380a3c09066fc4b42de7dde9b1ad ] Currently when a DCTCP receiver delays an ACK and receive a data packet with a different CE mark from the previous one's, it sends two immediate ACKs acking previous and latest sequences respectly (for ECN accounting). Previously sending the first ACK may mark off the delayed ACK timer (tcp_event_ack_sent). This may subsequently prevent sending the second ACK to acknowledge the latest sequence (tcp_ack_snd_check). The culprit is that tcp_send_ack() assumes it always acknowleges the latest sequence, which is not true for the first special ACK. The fix is to not make the assumption in tcp_send_ack and check the actual ack sequence before cancelling the delayed ACK. Further it's safer to pass the ack sequence number as a local variable into tcp_send_ack routine, instead of intercepting tp->rcv_nxt to avoid future bugs like this. Reported-by: Neal Cardwell Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 1 + net/ipv4/tcp_dctcp.c | 34 ++++------------------------------ net/ipv4/tcp_output.c | 11 ++++++++--- 3 files changed, 13 insertions(+), 33 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 3b49f3aafed7..62af4ae27430 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -560,6 +560,7 @@ void tcp_send_fin(struct sock *sk); void tcp_send_active_reset(struct sock *sk, gfp_t priority); int tcp_send_synack(struct sock *); void tcp_push_one(struct sock *, unsigned int mss_now); +void __tcp_send_ack(struct sock *sk, u32 rcv_nxt); void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); void tcp_send_loss_probe(struct sock *sk); diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 89f88b0d8167..39d96d27ff94 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -135,21 +135,8 @@ static void dctcp_ce_state_0_to_1(struct sock *sk) * ACK has not sent yet. */ if (!ca->ce_state && - inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { - u32 tmp_rcv_nxt; - - /* Save current rcv_nxt. */ - tmp_rcv_nxt = tp->rcv_nxt; - - /* Generate previous ack with CE=0. */ - tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; - tp->rcv_nxt = ca->prior_rcv_nxt; - - tcp_send_ack(sk); - - /* Recover current rcv_nxt. */ - tp->rcv_nxt = tmp_rcv_nxt; - } + inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) + __tcp_send_ack(sk, ca->prior_rcv_nxt); ca->prior_rcv_nxt = tp->rcv_nxt; ca->ce_state = 1; @@ -166,21 +153,8 @@ static void dctcp_ce_state_1_to_0(struct sock *sk) * ACK has not sent yet. */ if (ca->ce_state && - inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { - u32 tmp_rcv_nxt; - - /* Save current rcv_nxt. */ - tmp_rcv_nxt = tp->rcv_nxt; - - /* Generate previous ack with CE=1. */ - tp->ecn_flags |= TCP_ECN_DEMAND_CWR; - tp->rcv_nxt = ca->prior_rcv_nxt; - - tcp_send_ack(sk); - - /* Recover current rcv_nxt. */ - tp->rcv_nxt = tmp_rcv_nxt; - } + inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) + __tcp_send_ack(sk, ca->prior_rcv_nxt); ca->prior_rcv_nxt = tp->rcv_nxt; ca->ce_state = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c8da25f37ae4..3d8f6f342cb1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -175,8 +175,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp, } /* Account for an ACK we sent. */ -static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) +static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, + u32 rcv_nxt) { + struct tcp_sock *tp = tcp_sk(sk); + + if (unlikely(rcv_nxt != tp->rcv_nxt)) + return; /* Special ACK sent by DCTCP to reflect ECN */ tcp_dec_quickack_mode(sk, pkts); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } @@ -1098,7 +1103,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, icsk->icsk_af_ops->send_check(sk, skb); if (likely(tcb->tcp_flags & TCPHDR_ACK)) - tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); + tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt); if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); @@ -3595,12 +3600,12 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) /* Send it off, this clears delayed acks for us. */ __tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt); } +EXPORT_SYMBOL_GPL(__tcp_send_ack); void tcp_send_ack(struct sock *sk) { __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt); } -EXPORT_SYMBOL_GPL(tcp_send_ack); /* This routine sends a packet with an out of date sequence * number. It assumes the other end will try to ack it. -- GitLab From ae70b61531979a4caaed63be4f19b76fd58e7932 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 18 Jul 2018 13:56:36 -0700 Subject: [PATCH 0336/1291] tcp: do not delay ACK in DCTCP upon CE status change [ Upstream commit a0496ef2c23b3b180902dd185d0d63ccbc624cf8 ] Per DCTCP RFC8257 (Section 3.2) the ACK reflecting the CE status change has to be sent immediately so the sender can respond quickly: """ When receiving packets, the CE codepoint MUST be processed as follows: 1. If the CE codepoint is set and DCTCP.CE is false, set DCTCP.CE to true and send an immediate ACK. 2. If the CE codepoint is not set and DCTCP.CE is true, set DCTCP.CE to false and send an immediate ACK. """ Previously DCTCP implementation may continue to delay the ACK. This patch fixes that to implement the RFC by forcing an immediate ACK. Tested with this packetdrill script provided by Larry Brakmo 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0 0.000 bind(3, ..., ...) = 0 0.000 listen(3, 1) = 0 0.100 < [ect0] SEW 0:0(0) win 32792 0.100 > SE. 0:0(0) ack 1 0.110 < [ect0] . 1:1(0) ack 1 win 257 0.200 accept(3, ..., ...) = 4 +0 setsockopt(4, SOL_SOCKET, SO_DEBUG, [1], 4) = 0 0.200 < [ect0] . 1:1001(1000) ack 1 win 257 0.200 > [ect01] . 1:1(0) ack 1001 0.200 write(4, ..., 1) = 1 0.200 > [ect01] P. 1:2(1) ack 1001 0.200 < [ect0] . 1001:2001(1000) ack 2 win 257 +0.005 < [ce] . 2001:3001(1000) ack 2 win 257 +0.000 > [ect01] . 2:2(0) ack 2001 // Previously the ACK below would be delayed by 40ms +0.000 > [ect01] E. 2:2(0) ack 3001 +0.500 < F. 9501:9501(0) ack 4 win 257 Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 1 + net/ipv4/tcp_dctcp.c | 30 ++++++++++++++++++------------ net/ipv4/tcp_input.c | 3 ++- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 62af4ae27430..3173dd12b8cc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -372,6 +372,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); +void tcp_enter_quickack_mode(struct sock *sk); static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) { diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 39d96d27ff94..c78fb53988a1 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -131,12 +131,15 @@ static void dctcp_ce_state_0_to_1(struct sock *sk) struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - /* State has changed from CE=0 to CE=1 and delayed - * ACK has not sent yet. - */ - if (!ca->ce_state && - inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) - __tcp_send_ack(sk, ca->prior_rcv_nxt); + if (!ca->ce_state) { + /* State has changed from CE=0 to CE=1, force an immediate + * ACK to reflect the new CE state. If an ACK was delayed, + * send that first to reflect the prior CE state. + */ + if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) + __tcp_send_ack(sk, ca->prior_rcv_nxt); + tcp_enter_quickack_mode(sk); + } ca->prior_rcv_nxt = tp->rcv_nxt; ca->ce_state = 1; @@ -149,12 +152,15 @@ static void dctcp_ce_state_1_to_0(struct sock *sk) struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - /* State has changed from CE=1 to CE=0 and delayed - * ACK has not sent yet. - */ - if (ca->ce_state && - inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) - __tcp_send_ack(sk, ca->prior_rcv_nxt); + if (ca->ce_state) { + /* State has changed from CE=1 to CE=0, force an immediate + * ACK to reflect the new CE state. If an ACK was delayed, + * send that first to reflect the prior CE state. + */ + if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) + __tcp_send_ack(sk, ca->prior_rcv_nxt); + tcp_enter_quickack_mode(sk); + } ca->prior_rcv_nxt = tp->rcv_nxt; ca->ce_state = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5711b1b12d28..03fd2ff4007b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -209,13 +209,14 @@ static void tcp_incr_quickack(struct sock *sk) icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); } -static void tcp_enter_quickack_mode(struct sock *sk) +void tcp_enter_quickack_mode(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); tcp_incr_quickack(sk); icsk->icsk_ack.pingpong = 0; icsk->icsk_ack.ato = TCP_ATO_MIN; } +EXPORT_SYMBOL(tcp_enter_quickack_mode); /* Send ACKs quickly, if "quick" count is not exhausted * and the session is not interactive. -- GitLab From f3a5ba6310e11df370f6888ed716d1486896d983 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jul 2018 09:28:17 -0700 Subject: [PATCH 0337/1291] tcp: free batches of packets in tcp_prune_ofo_queue() [ Upstream commit 72cd43ba64fc172a443410ce01645895850844c8 ] Juha-Matti Tilli reported that malicious peers could inject tiny packets in out_of_order_queue, forcing very expensive calls to tcp_collapse_ofo_queue() and tcp_prune_ofo_queue() for every incoming packet. out_of_order_queue rb-tree can contain thousands of nodes, iterating over all of them is not nice. Before linux-4.9, we would have pruned all packets in ofo_queue in one go, every XXXX packets. XXXX depends on sk_rcvbuf and skbs truesize, but is about 7000 packets with tcp_rmem[2] default of 6 MB. Since we plan to increase tcp_rmem[2] in the future to cope with modern BDP, can not revert to the old behavior, without great pain. Strategy taken in this patch is to purge ~12.5 % of the queue capacity. Fixes: 36a6503fedda ("tcp: refine tcp_prune_ofo_queue() to not drop all packets") Signed-off-by: Eric Dumazet Reported-by: Juha-Matti Tilli Acked-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 2 ++ net/ipv4/tcp_input.c | 15 +++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9cf971c68401..6dd77767fd5b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3167,6 +3167,8 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) return __skb_grow(skb, len); } +#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) + #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ skb != (struct sk_buff *)(queue); \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 03fd2ff4007b..1e7175bd3b59 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4924,6 +4924,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) * 2) not add too big latencies if thousands of packets sit there. * (But if application shrinks SO_RCVBUF, we could still end up * freeing whole queue here) + * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks. * * Return true if queue has shrunk. */ @@ -4931,20 +4932,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct rb_node *node, *prev; + int goal; if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) return false; NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); + goal = sk->sk_rcvbuf >> 3; node = &tp->ooo_last_skb->rbnode; do { prev = rb_prev(node); rb_erase(node, &tp->out_of_order_queue); + goal -= rb_to_skb(node)->truesize; tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode)); - sk_mem_reclaim(sk); - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && - !tcp_under_memory_pressure(sk)) - break; + if (!prev || goal <= 0) { + sk_mem_reclaim(sk); + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && + !tcp_under_memory_pressure(sk)) + break; + goal = sk->sk_rcvbuf >> 3; + } node = prev; } while (node); tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); -- GitLab From 81e6b01d1c10015811f52bf04ec125bdb291b4b5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jul 2018 09:28:18 -0700 Subject: [PATCH 0338/1291] tcp: avoid collapses in tcp_prune_queue() if possible [ Upstream commit f4a3313d8e2ca9fd8d8f45e40a2903ba782607e7 ] Right after a TCP flow is created, receiving tiny out of order packets allways hit the condition : if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) tcp_clamp_window(sk); tcp_clamp_window() increases sk_rcvbuf to match sk_rmem_alloc (guarded by tcp_rmem[2]) Calling tcp_collapse_ofo_queue() in this case is not useful, and offers a O(N^2) surface attack to malicious peers. Better not attempt anything before full queue capacity is reached, forcing attacker to spend lots of resource and allow us to more easily detect the abuse. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1e7175bd3b59..023d2b016f5d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4986,6 +4986,9 @@ static int tcp_prune_queue(struct sock *sk) else if (tcp_under_memory_pressure(sk)) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) + return 0; + tcp_collapse_ofo_queue(sk); if (!skb_queue_empty(&sk->sk_receive_queue)) tcp_collapse(sk, &sk->sk_receive_queue, NULL, -- GitLab From 6285a74a536f1ee807488436547cc17cda3306d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jul 2018 09:28:19 -0700 Subject: [PATCH 0339/1291] tcp: detect malicious patterns in tcp_collapse_ofo_queue() [ Upstream commit 3d4bf93ac12003f9b8e1e2de37fe27983deebdcf ] In case an attacker feeds tiny packets completely out of order, tcp_collapse_ofo_queue() might scan the whole rb-tree, performing expensive copies, but not changing socket memory usage at all. 1) Do not attempt to collapse tiny skbs. 2) Add logic to exit early when too many tiny skbs are detected. We prefer not doing aggressive collapsing (which copies packets) for pathological flows, and revert to tcp_prune_ofo_queue() which will be less expensive. In the future, we might add the possibility of terminating flows that are proven to be malicious. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 023d2b016f5d..6f09c45dc20c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4877,6 +4877,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, static void tcp_collapse_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + u32 range_truesize, sum_tiny = 0; struct sk_buff *skb, *head; struct rb_node *p; u32 start, end; @@ -4895,6 +4896,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) } start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; + range_truesize = skb->truesize; for (head = skb;;) { skb = tcp_skb_next(skb, NULL); @@ -4905,11 +4907,20 @@ static void tcp_collapse_ofo_queue(struct sock *sk) if (!skb || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { - tcp_collapse(sk, NULL, &tp->out_of_order_queue, - head, skb, start, end); + /* Do not attempt collapsing tiny skbs */ + if (range_truesize != head->truesize || + end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { + tcp_collapse(sk, NULL, &tp->out_of_order_queue, + head, skb, start, end); + } else { + sum_tiny += range_truesize; + if (sum_tiny > sk->sk_rcvbuf >> 3) + return; + } goto new_range; } + range_truesize += skb->truesize; if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) start = TCP_SKB_CB(skb)->seq; if (after(TCP_SKB_CB(skb)->end_seq, end)) -- GitLab From ec645ae62309a85522c2bc8f700afc6e152e62b9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jul 2018 09:28:20 -0700 Subject: [PATCH 0340/1291] tcp: call tcp_drop() from tcp_data_queue_ofo() [ Upstream commit 8541b21e781a22dce52a74fef0b9bed00404a1cd ] In order to be able to give better diagnostics and detect malicious traffic, we need to have better sk->sk_drops tracking. Fixes: 9f5afeae5152 ("tcp: use an RB tree for ooo receive queue") Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6f09c45dc20c..35b0e38fe31a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4492,7 +4492,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) /* All the bits are present. Drop. */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - __kfree_skb(skb); + tcp_drop(sk, skb); skb = NULL; tcp_dsack_set(sk, seq, end_seq); goto add_sack; @@ -4511,7 +4511,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb1)->end_seq); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - __kfree_skb(skb1); + tcp_drop(sk, skb1); goto merge_right; } } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, -- GitLab From 22e3d3178b18115ba60cae7c968a67718f070da0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jul 2018 09:28:21 -0700 Subject: [PATCH 0341/1291] tcp: add tcp_ooo_try_coalesce() helper [ Upstream commit 58152ecbbcc6a0ce7fddd5bf5f6ee535834ece0c ] In case skb in out_or_order_queue is the result of multiple skbs coalescing, we would like to get a proper gso_segs counter tracking, so that future tcp_drop() can report an accurate number. I chose to not implement this tracking for skbs in receive queue, since they are not dropped, unless socket is disconnected. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 35b0e38fe31a..b86e7b8beb1d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4332,6 +4332,23 @@ static bool tcp_try_coalesce(struct sock *sk, return true; } +static bool tcp_ooo_try_coalesce(struct sock *sk, + struct sk_buff *to, + struct sk_buff *from, + bool *fragstolen) +{ + bool res = tcp_try_coalesce(sk, OOO_QUEUE, to, from, fragstolen); + + /* In case tcp_drop() is called later, update to->gso_segs */ + if (res) { + u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) + + max_t(u16, 1, skb_shinfo(from)->gso_segs); + + skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF); + } + return res; +} + static void tcp_drop(struct sock *sk, struct sk_buff *skb) { sk_drops_add(sk, skb); @@ -4463,8 +4480,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) /* In the typical case, we are adding an skb to the end of the list. * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. */ - if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb, - skb, &fragstolen)) { + if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, + skb, &fragstolen)) { coalesce_done: tcp_grow_window(sk, skb); kfree_skb_partial(skb, fragstolen); @@ -4514,8 +4531,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) tcp_drop(sk, skb1); goto merge_right; } - } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, - skb, &fragstolen)) { + } else if (tcp_ooo_try_coalesce(sk, skb1, + skb, &fragstolen)) { goto coalesce_done; } p = &parent->rb_right; -- GitLab From ab9489c4db894cc60f30430f80464eec8ccdf066 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Fri, 13 Jul 2018 00:29:36 +0200 Subject: [PATCH 0342/1291] staging: speakup: fix wraparound in uaccess length check commit b96fba8d5855c3617adbfb43ca4723a808cac954 upstream. If softsynthx_read() is called with `count < 3`, `count - 3` wraps, causing the loop to copy as much data as available to the provided buffer. If softsynthx_read() is invoked through sys_splice(), this causes an unbounded kernel write; but even when userspace just reads from it normally, a small size could cause userspace crashes. Fixes: 425e586cf95b ("speakup: add unicode variant of /dev/softsynth") Cc: stable@vger.kernel.org Signed-off-by: Samuel Thibault Signed-off-by: Jann Horn Signed-off-by: Greg Kroah-Hartman --- drivers/staging/speakup/speakup_soft.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/staging/speakup/speakup_soft.c b/drivers/staging/speakup/speakup_soft.c index d99daf69e501..fe229d63deec 100644 --- a/drivers/staging/speakup/speakup_soft.c +++ b/drivers/staging/speakup/speakup_soft.c @@ -207,11 +207,15 @@ static ssize_t softsynthx_read(struct file *fp, char __user *buf, size_t count, int chars_sent = 0; char __user *cp; char *init; + size_t bytes_per_ch = unicode ? 3 : 1; u16 ch; int empty; unsigned long flags; DEFINE_WAIT(wait); + if (count < bytes_per_ch) + return -EINVAL; + spin_lock_irqsave(&speakup_info.spinlock, flags); while (1) { prepare_to_wait(&speakup_event, &wait, TASK_INTERRUPTIBLE); @@ -237,7 +241,7 @@ static ssize_t softsynthx_read(struct file *fp, char __user *buf, size_t count, init = get_initstring(); /* Keep 3 bytes available for a 16bit UTF-8-encoded character */ - while (chars_sent <= count - 3) { + while (chars_sent <= count - bytes_per_ch) { if (speakup_info.flushing) { speakup_info.flushing = 0; ch = '\x18'; -- GitLab From e089c305af4903793dae85dc9d9b706e0d17bfb3 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Tue, 10 Jul 2018 08:28:49 +0200 Subject: [PATCH 0343/1291] usb: cdc_acm: Add quirk for Castles VEGA3000 commit 1445cbe476fc3dd09c0b380b206526a49403c071 upstream. The device (a POS terminal) implements CDC ACM, but has not union descriptor. Signed-off-by: Lubomir Rintel Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 3b9aadd007f5..f2f31fc16f29 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1844,6 +1844,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */ .driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */ }, + { USB_DEVICE(0x0ca6, 0xa050), /* Castles VEGA3000 */ + .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ + }, { USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */ .driver_info = CLEAR_HALT_CONDITIONS, -- GitLab From ac3f65c6b6351b729be072debc44cd28a22e0d0e Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Thu, 19 Jul 2018 14:39:37 -0500 Subject: [PATCH 0344/1291] usb: core: handle hub C_PORT_OVER_CURRENT condition commit 249a32b7eeb3edb6897dd38f89651a62163ac4ed upstream. Based on USB2.0 Spec Section 11.12.5, "If a hub has per-port power switching and per-port current limiting, an over-current on one port may still cause the power on another port to fall below specific minimums. In this case, the affected port is placed in the Power-Off state and C_PORT_OVER_CURRENT is set for the port, but PORT_OVER_CURRENT is not set." so let's check C_PORT_OVER_CURRENT too for over current condition. Fixes: 08d1dec6f405 ("usb:hub set hub->change_bits when over-current happens") Cc: Tested-by: Alessandro Antenucci Signed-off-by: Bin Liu Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index e5f77e611451..a8bc48b26c23 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1141,10 +1141,14 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) if (!udev || udev->state == USB_STATE_NOTATTACHED) { /* Tell hub_wq to disconnect the device or - * check for a new connection + * check for a new connection or over current condition. + * Based on USB2.0 Spec Section 11.12.5, + * C_PORT_OVER_CURRENT could be set while + * PORT_OVER_CURRENT is not. So check for any of them. */ if (udev || (portstatus & USB_PORT_STAT_CONNECTION) || - (portstatus & USB_PORT_STAT_OVERCURRENT)) + (portstatus & USB_PORT_STAT_OVERCURRENT) || + (portchange & USB_PORT_STAT_C_OVERCURRENT)) set_bit(port1, hub->change_bits); } else if (portstatus & USB_PORT_STAT_ENABLE) { -- GitLab From 68fc92a0f3913d539d1ac68a861f895e34099e46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antti=20Sepp=C3=A4l=C3=A4?= Date: Thu, 5 Jul 2018 17:31:53 +0300 Subject: [PATCH 0345/1291] usb: dwc2: Fix DMA alignment to start at allocated boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 56406e017a883b54b339207b230f85599f4d70ae upstream. The commit 3bc04e28a030 ("usb: dwc2: host: Get aligned DMA in a more supported way") introduced a common way to align DMA allocations. The code in the commit aligns the struct dma_aligned_buffer but the actual DMA address pointed by data[0] gets aligned to an offset from the allocated boundary by the kmalloc_ptr and the old_xfer_buffer pointers. This is against the recommendation in Documentation/DMA-API.txt which states: Therefore, it is recommended that driver writers who don't take special care to determine the cache line size at run time only map virtual regions that begin and end on page boundaries (which are guaranteed also to be cache line boundaries). The effect of this is that architectures with non-coherent DMA caches may run into memory corruption or kernel crashes with Unhandled kernel unaligned accesses exceptions. Fix the alignment by positioning the DMA area in front of the allocation and use memory at the end of the area for storing the orginal transfer_buffer pointer. This may have the added benefit of increased performance as the DMA area is now fully aligned on all architectures. Tested with Lantiq xRX200 (MIPS) and RPi Model B Rev 2 (ARM). Fixes: 3bc04e28a030 ("usb: dwc2: host: Get aligned DMA in a more supported way") Cc: Reviewed-by: Douglas Anderson Signed-off-by: Antti Seppälä Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/hcd.c | 44 ++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 87484f71b2ab..46d3b0fc00c5 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -2606,34 +2606,29 @@ static void dwc2_hc_init_xfer(struct dwc2_hsotg *hsotg, #define DWC2_USB_DMA_ALIGN 4 -struct dma_aligned_buffer { - void *kmalloc_ptr; - void *old_xfer_buffer; - u8 data[0]; -}; - static void dwc2_free_dma_aligned_buffer(struct urb *urb) { - struct dma_aligned_buffer *temp; + void *stored_xfer_buffer; if (!(urb->transfer_flags & URB_ALIGNED_TEMP_BUFFER)) return; - temp = container_of(urb->transfer_buffer, - struct dma_aligned_buffer, data); + /* Restore urb->transfer_buffer from the end of the allocated area */ + memcpy(&stored_xfer_buffer, urb->transfer_buffer + + urb->transfer_buffer_length, sizeof(urb->transfer_buffer)); if (usb_urb_dir_in(urb)) - memcpy(temp->old_xfer_buffer, temp->data, + memcpy(stored_xfer_buffer, urb->transfer_buffer, urb->transfer_buffer_length); - urb->transfer_buffer = temp->old_xfer_buffer; - kfree(temp->kmalloc_ptr); + kfree(urb->transfer_buffer); + urb->transfer_buffer = stored_xfer_buffer; urb->transfer_flags &= ~URB_ALIGNED_TEMP_BUFFER; } static int dwc2_alloc_dma_aligned_buffer(struct urb *urb, gfp_t mem_flags) { - struct dma_aligned_buffer *temp, *kmalloc_ptr; + void *kmalloc_ptr; size_t kmalloc_size; if (urb->num_sgs || urb->sg || @@ -2641,22 +2636,29 @@ static int dwc2_alloc_dma_aligned_buffer(struct urb *urb, gfp_t mem_flags) !((uintptr_t)urb->transfer_buffer & (DWC2_USB_DMA_ALIGN - 1))) return 0; - /* Allocate a buffer with enough padding for alignment */ + /* + * Allocate a buffer with enough padding for original transfer_buffer + * pointer. This allocation is guaranteed to be aligned properly for + * DMA + */ kmalloc_size = urb->transfer_buffer_length + - sizeof(struct dma_aligned_buffer) + DWC2_USB_DMA_ALIGN - 1; + sizeof(urb->transfer_buffer); kmalloc_ptr = kmalloc(kmalloc_size, mem_flags); if (!kmalloc_ptr) return -ENOMEM; - /* Position our struct dma_aligned_buffer such that data is aligned */ - temp = PTR_ALIGN(kmalloc_ptr + 1, DWC2_USB_DMA_ALIGN) - 1; - temp->kmalloc_ptr = kmalloc_ptr; - temp->old_xfer_buffer = urb->transfer_buffer; + /* + * Position value of original urb->transfer_buffer pointer to the end + * of allocation for later referencing + */ + memcpy(kmalloc_ptr + urb->transfer_buffer_length, + &urb->transfer_buffer, sizeof(urb->transfer_buffer)); + if (usb_urb_dir_out(urb)) - memcpy(temp->data, urb->transfer_buffer, + memcpy(kmalloc_ptr, urb->transfer_buffer, urb->transfer_buffer_length); - urb->transfer_buffer = temp->data; + urb->transfer_buffer = kmalloc_ptr; urb->transfer_flags |= URB_ALIGNED_TEMP_BUFFER; -- GitLab From 5421694d8cd76e258a3aa2923794f772f2e6a4de Mon Sep 17 00:00:00 2001 From: Jerry Zhang Date: Mon, 2 Jul 2018 12:48:08 -0700 Subject: [PATCH 0346/1291] usb: gadget: f_fs: Only return delayed status when len is 0 commit 4d644abf25698362bd33d17c9ddc8f7122c30f17 upstream. Commit 1b9ba000 ("Allow function drivers to pause control transfers") states that USB_GADGET_DELAYED_STATUS is only supported if data phase is 0 bytes. It seems that when the length is not 0 bytes, there is no need to explicitly delay the data stage since the transfer is not completed until the user responds. However, when the length is 0, there is no data stage and the transfer is finished once setup() returns, hence there is a need to explicitly delay completion. This manifests as the following bugs: Prior to 946ef68ad4e4 ('Let setup() return USB_GADGET_DELAYED_STATUS'), when setup is 0 bytes, ffs would require user to queue a 0 byte request in order to clear setup state. However, that 0 byte request was actually not needed and would hang and cause errors in other setup requests. After the above commit, 0 byte setups work since the gadget now accepts empty queues to ep0 to clear the delay, but all other setups hang. Fixes: 946ef68ad4e4 ("Let setup() return USB_GADGET_DELAYED_STATUS") Signed-off-by: Jerry Zhang Cc: stable Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 7b53ac548b1a..52e6897fa35a 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3243,7 +3243,7 @@ static int ffs_func_setup(struct usb_function *f, __ffs_event_add(ffs, FUNCTIONFS_SETUP); spin_unlock_irqrestore(&ffs->ev.waitq.lock, flags); - return USB_GADGET_DELAYED_STATUS; + return creq->wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0; } static bool ffs_func_req_match(struct usb_function *f, -- GitLab From 55cb8f40c8d7ee1adb1575257cf739be26561e9e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Jul 2018 14:51:33 +0200 Subject: [PATCH 0347/1291] driver core: Partially revert "driver core: correct device's shutdown order" commit 722e5f2b1eec7de61117b7c0a7914761e3da2eda upstream. Commit 52cdbdd49853 (driver core: correct device's shutdown order) introduced a regression by breaking device shutdown on some systems. Namely, the devices_kset_move_last() call in really_probe() added by that commit is a mistake as it may cause parents to follow children in the devices_kset list which then causes shutdown to fail. For example, if a device has children before really_probe() is called for it (which is not uncommon), that call will cause it to be reordered after the children in the devices_kset list and the ordering of that list will not reflect the correct device shutdown order any more. Also it causes the devices_kset list to be constantly reordered until all drivers have been probed which is totally pointless overhead in the majority of cases and it only covered an issue with system shutdown, while system-wide suspend/resume potentially had the same issue on the affected platforms (which was not covered). Moreover, the shutdown issue originally addressed by the change in really_probe() made by commit 52cdbdd49853 is not present in 4.18-rc any more, since dra7 started to use the sdhci-omap driver which doesn't disable any regulators during shutdown, so the really_probe() part of commit 52cdbdd49853 can be safely reverted. [The original issue was related to the omap_hsmmc driver used by dra7 previously.] For the above reasons, revert the really_probe() modifications made by commit 52cdbdd49853. The other code changes made by commit 52cdbdd49853 are useful and they need not be reverted. Fixes: 52cdbdd49853 (driver core: correct device's shutdown order) Link: https://lore.kernel.org/lkml/CAFgQCTt7VfqM=UyCnvNFxrSw8Z6cUtAi3HUwR4_xPAc03SgHjQ@mail.gmail.com/ Reported-by: Pingfan Liu Tested-by: Pingfan Liu Reviewed-by: Kishon Vijay Abraham I Signed-off-by: Rafael J. Wysocki Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index ad44b40fe284..55fc31f6fe7f 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -401,14 +401,6 @@ static int really_probe(struct device *dev, struct device_driver *drv) goto probe_failed; } - /* - * Ensure devices are listed in devices_kset in correct order - * It's important to move Dev to the end of devices_kset before - * calling .probe, because it could be recursive and parent Dev - * should always go first - */ - devices_kset_move_last(dev); - if (dev->bus->probe) { ret = dev->bus->probe(dev); if (ret) -- GitLab From 464a3f9139f4ebea238f15db83705eac9561d2bc Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Tue, 7 Feb 2017 17:01:14 +0200 Subject: [PATCH 0348/1291] can: xilinx_can: fix RX loop if RXNEMP is asserted without RXOK commit 32852c561bffd613d4ed7ec464b1e03e1b7b6c5c upstream. If the device gets into a state where RXNEMP (RX FIFO not empty) interrupt is asserted without RXOK (new frame received successfully) interrupt being asserted, xcan_rx_poll() will continue to try to clear RXNEMP without actually reading frames from RX FIFO. If the RX FIFO is not empty, the interrupt will not be cleared and napi_schedule() will just be called again. This situation can occur when: (a) xcan_rx() returns without reading RX FIFO due to an error condition. The code tries to clear both RXOK and RXNEMP but RXNEMP will not clear due to a frame still being in the FIFO. The frame will never be read from the FIFO as RXOK is no longer set. (b) A frame is received between xcan_rx_poll() reading interrupt status and clearing RXOK. RXOK will be cleared, but RXNEMP will again remain set as the new message is still in the FIFO. I'm able to trigger case (b) by flooding the bus with frames under load. There does not seem to be any benefit in using both RXNEMP and RXOK in the way the driver does, and the polling example in the reference manual (UG585 v1.10 18.3.7 Read Messages from RxFIFO) also says that either RXOK or RXNEMP can be used for detecting incoming messages. Fix the issue and simplify the RX processing by only using RXNEMP without RXOK. Tested with the integrated CAN on Zynq-7000 SoC. Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/xilinx_can.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 89aec07c225f..05ea2820d27b 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -101,7 +101,7 @@ enum xcan_reg { #define XCAN_INTR_ALL (XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\ XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \ XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \ - XCAN_IXR_ARBLST_MASK | XCAN_IXR_RXOK_MASK) + XCAN_IXR_ARBLST_MASK) /* CAN register bit shift - XCAN___SHIFT */ #define XCAN_BTR_SJW_SHIFT 7 /* Synchronous jump width */ @@ -709,15 +709,7 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota) isr = priv->read_reg(priv, XCAN_ISR_OFFSET); while ((isr & XCAN_IXR_RXNEMP_MASK) && (work_done < quota)) { - if (isr & XCAN_IXR_RXOK_MASK) { - priv->write_reg(priv, XCAN_ICR_OFFSET, - XCAN_IXR_RXOK_MASK); - work_done += xcan_rx(ndev); - } else { - priv->write_reg(priv, XCAN_ICR_OFFSET, - XCAN_IXR_RXNEMP_MASK); - break; - } + work_done += xcan_rx(ndev); priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_RXNEMP_MASK); isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } @@ -728,7 +720,7 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota) if (work_done < quota) { napi_complete_done(napi, work_done); ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier |= (XCAN_IXR_RXOK_MASK | XCAN_IXR_RXNEMP_MASK); + ier |= XCAN_IXR_RXNEMP_MASK; priv->write_reg(priv, XCAN_IER_OFFSET, ier); } return work_done; @@ -800,9 +792,9 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) } /* Check for the type of receive interrupt and Processing it */ - if (isr & (XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK)) { + if (isr & XCAN_IXR_RXNEMP_MASK) { ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier &= ~(XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK); + ier &= ~XCAN_IXR_RXNEMP_MASK; priv->write_reg(priv, XCAN_IER_OFFSET, ier); napi_schedule(&priv->napi); } -- GitLab From f820de2a08b65bbe84372108c1975737f0d5d044 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Thu, 17 May 2018 15:41:19 +0300 Subject: [PATCH 0349/1291] can: xilinx_can: fix power management handling commit 8ebd83bdb027f29870d96649dba18b91581ea829 upstream. There are several issues with the suspend/resume handling code of the driver: - The device is attached and detached in the runtime_suspend() and runtime_resume() callbacks if the interface is running. However, during xcan_chip_start() the interface is considered running, causing the resume handler to incorrectly call netif_start_queue() at the beginning of xcan_chip_start(), and on xcan_chip_start() error return the suspend handler detaches the device leaving the user unable to bring-up the device anymore. - The device is not brought properly up on system resume. A reset is done and the code tries to determine the bus state after that. However, after reset the device is always in Configuration mode (down), so the state checking code does not make sense and communication will also not work. - The suspend callback tries to set the device to sleep mode (low-power mode which monitors the bus and brings the device back to normal mode on activity), but then immediately disables the clocks (possibly before the device reaches the sleep mode), which does not make sense to me. If a clean shutdown is wanted before disabling clocks, we can just bring it down completely instead of only sleep mode. Reorganize the PM code so that only the clock logic remains in the runtime PM callbacks and the system PM callbacks contain the device bring-up/down logic. This makes calling the runtime PM callbacks during e.g. xcan_chip_start() safe. The system PM callbacks now simply call common code to start/stop the HW if the interface was running, replacing the broken code from before. xcan_chip_stop() is updated to use the common reset code so that it will wait for the reset to complete. Reset also disables all interrupts so do not do that separately. Also, the device_may_wakeup() checks are removed as the driver does not have wakeup support. Tested on Zynq-7000 integrated CAN. Signed-off-by: Anssi Hannula Cc: Michal Simek Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/xilinx_can.c | 69 +++++++++++++++--------------------- 1 file changed, 28 insertions(+), 41 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 05ea2820d27b..88446ae11815 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -811,13 +811,9 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) static void xcan_chip_stop(struct net_device *ndev) { struct xcan_priv *priv = netdev_priv(ndev); - u32 ier; /* Disable interrupts and leave the can in configuration mode */ - ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier &= ~XCAN_INTR_ALL; - priv->write_reg(priv, XCAN_IER_OFFSET, ier); - priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK); + set_reset_mode(ndev); priv->can.state = CAN_STATE_STOPPED; } @@ -950,10 +946,15 @@ static const struct net_device_ops xcan_netdev_ops = { */ static int __maybe_unused xcan_suspend(struct device *dev) { - if (!device_may_wakeup(dev)) - return pm_runtime_force_suspend(dev); + struct net_device *ndev = dev_get_drvdata(dev); - return 0; + if (netif_running(ndev)) { + netif_stop_queue(ndev); + netif_device_detach(ndev); + xcan_chip_stop(ndev); + } + + return pm_runtime_force_suspend(dev); } /** @@ -965,11 +966,27 @@ static int __maybe_unused xcan_suspend(struct device *dev) */ static int __maybe_unused xcan_resume(struct device *dev) { - if (!device_may_wakeup(dev)) - return pm_runtime_force_resume(dev); + struct net_device *ndev = dev_get_drvdata(dev); + int ret; - return 0; + ret = pm_runtime_force_resume(dev); + if (ret) { + dev_err(dev, "pm_runtime_force_resume failed on resume\n"); + return ret; + } + + if (netif_running(ndev)) { + ret = xcan_chip_start(ndev); + if (ret) { + dev_err(dev, "xcan_chip_start failed on resume\n"); + return ret; + } + + netif_device_attach(ndev); + netif_start_queue(ndev); + } + return 0; } /** @@ -984,14 +1001,6 @@ static int __maybe_unused xcan_runtime_suspend(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct xcan_priv *priv = netdev_priv(ndev); - if (netif_running(ndev)) { - netif_stop_queue(ndev); - netif_device_detach(ndev); - } - - priv->write_reg(priv, XCAN_MSR_OFFSET, XCAN_MSR_SLEEP_MASK); - priv->can.state = CAN_STATE_SLEEPING; - clk_disable_unprepare(priv->bus_clk); clk_disable_unprepare(priv->can_clk); @@ -1010,7 +1019,6 @@ static int __maybe_unused xcan_runtime_resume(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct xcan_priv *priv = netdev_priv(ndev); int ret; - u32 isr, status; ret = clk_prepare_enable(priv->bus_clk); if (ret) { @@ -1024,27 +1032,6 @@ static int __maybe_unused xcan_runtime_resume(struct device *dev) return ret; } - priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK); - isr = priv->read_reg(priv, XCAN_ISR_OFFSET); - status = priv->read_reg(priv, XCAN_SR_OFFSET); - - if (netif_running(ndev)) { - if (isr & XCAN_IXR_BSOFF_MASK) { - priv->can.state = CAN_STATE_BUS_OFF; - priv->write_reg(priv, XCAN_SRR_OFFSET, - XCAN_SRR_RESET_MASK); - } else if ((status & XCAN_SR_ESTAT_MASK) == - XCAN_SR_ESTAT_MASK) { - priv->can.state = CAN_STATE_ERROR_PASSIVE; - } else if (status & XCAN_SR_ERRWRN_MASK) { - priv->can.state = CAN_STATE_ERROR_WARNING; - } else { - priv->can.state = CAN_STATE_ERROR_ACTIVE; - } - netif_device_attach(ndev); - netif_start_queue(ndev); - } - return 0; } -- GitLab From c5846b2fd57b593180210f74d2f5e937f9e421cc Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Wed, 8 Feb 2017 13:13:40 +0200 Subject: [PATCH 0350/1291] can: xilinx_can: fix recovery from error states not being propagated commit 877e0b75947e2c7acf5624331bb17ceb093c98ae upstream. The xilinx_can driver contains no mechanism for propagating recovery from CAN_STATE_ERROR_WARNING and CAN_STATE_ERROR_PASSIVE. Add such a mechanism by factoring the handling of XCAN_STATE_ERROR_PASSIVE and XCAN_STATE_ERROR_WARNING out of xcan_err_interrupt and checking for recovery after RX and TX if the interface is in one of those states. Tested with the integrated CAN on Zynq-7000 SoC. Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/xilinx_can.c | 155 ++++++++++++++++++++++++++++------- 1 file changed, 127 insertions(+), 28 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 88446ae11815..cc954a794f5a 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -2,6 +2,7 @@ * * Copyright (C) 2012 - 2014 Xilinx, Inc. * Copyright (C) 2009 PetaLogix. All rights reserved. + * Copyright (C) 2017 Sandvik Mining and Construction Oy * * Description: * This driver is developed for Axi CAN IP and for Zynq CANPS Controller. @@ -529,6 +530,123 @@ static int xcan_rx(struct net_device *ndev) return 1; } +/** + * xcan_current_error_state - Get current error state from HW + * @ndev: Pointer to net_device structure + * + * Checks the current CAN error state from the HW. Note that this + * only checks for ERROR_PASSIVE and ERROR_WARNING. + * + * Return: + * ERROR_PASSIVE or ERROR_WARNING if either is active, ERROR_ACTIVE + * otherwise. + */ +static enum can_state xcan_current_error_state(struct net_device *ndev) +{ + struct xcan_priv *priv = netdev_priv(ndev); + u32 status = priv->read_reg(priv, XCAN_SR_OFFSET); + + if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) + return CAN_STATE_ERROR_PASSIVE; + else if (status & XCAN_SR_ERRWRN_MASK) + return CAN_STATE_ERROR_WARNING; + else + return CAN_STATE_ERROR_ACTIVE; +} + +/** + * xcan_set_error_state - Set new CAN error state + * @ndev: Pointer to net_device structure + * @new_state: The new CAN state to be set + * @cf: Error frame to be populated or NULL + * + * Set new CAN error state for the device, updating statistics and + * populating the error frame if given. + */ +static void xcan_set_error_state(struct net_device *ndev, + enum can_state new_state, + struct can_frame *cf) +{ + struct xcan_priv *priv = netdev_priv(ndev); + u32 ecr = priv->read_reg(priv, XCAN_ECR_OFFSET); + u32 txerr = ecr & XCAN_ECR_TEC_MASK; + u32 rxerr = (ecr & XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT; + + priv->can.state = new_state; + + if (cf) { + cf->can_id |= CAN_ERR_CRTL; + cf->data[6] = txerr; + cf->data[7] = rxerr; + } + + switch (new_state) { + case CAN_STATE_ERROR_PASSIVE: + priv->can.can_stats.error_passive++; + if (cf) + cf->data[1] = (rxerr > 127) ? + CAN_ERR_CRTL_RX_PASSIVE : + CAN_ERR_CRTL_TX_PASSIVE; + break; + case CAN_STATE_ERROR_WARNING: + priv->can.can_stats.error_warning++; + if (cf) + cf->data[1] |= (txerr > rxerr) ? + CAN_ERR_CRTL_TX_WARNING : + CAN_ERR_CRTL_RX_WARNING; + break; + case CAN_STATE_ERROR_ACTIVE: + if (cf) + cf->data[1] |= CAN_ERR_CRTL_ACTIVE; + break; + default: + /* non-ERROR states are handled elsewhere */ + WARN_ON(1); + break; + } +} + +/** + * xcan_update_error_state_after_rxtx - Update CAN error state after RX/TX + * @ndev: Pointer to net_device structure + * + * If the device is in a ERROR-WARNING or ERROR-PASSIVE state, check if + * the performed RX/TX has caused it to drop to a lesser state and set + * the interface state accordingly. + */ +static void xcan_update_error_state_after_rxtx(struct net_device *ndev) +{ + struct xcan_priv *priv = netdev_priv(ndev); + enum can_state old_state = priv->can.state; + enum can_state new_state; + + /* changing error state due to successful frame RX/TX can only + * occur from these states + */ + if (old_state != CAN_STATE_ERROR_WARNING && + old_state != CAN_STATE_ERROR_PASSIVE) + return; + + new_state = xcan_current_error_state(ndev); + + if (new_state != old_state) { + struct sk_buff *skb; + struct can_frame *cf; + + skb = alloc_can_err_skb(ndev, &cf); + + xcan_set_error_state(ndev, new_state, skb ? cf : NULL); + + if (skb) { + struct net_device_stats *stats = &ndev->stats; + + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; + netif_rx(skb); + } + } +} + /** * xcan_err_interrupt - error frame Isr * @ndev: net_device pointer @@ -544,16 +662,12 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) struct net_device_stats *stats = &ndev->stats; struct can_frame *cf; struct sk_buff *skb; - u32 err_status, status, txerr = 0, rxerr = 0; + u32 err_status; skb = alloc_can_err_skb(ndev, &cf); err_status = priv->read_reg(priv, XCAN_ESR_OFFSET); priv->write_reg(priv, XCAN_ESR_OFFSET, err_status); - txerr = priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_TEC_MASK; - rxerr = ((priv->read_reg(priv, XCAN_ECR_OFFSET) & - XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT); - status = priv->read_reg(priv, XCAN_SR_OFFSET); if (isr & XCAN_IXR_BSOFF_MASK) { priv->can.state = CAN_STATE_BUS_OFF; @@ -563,28 +677,10 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) can_bus_off(ndev); if (skb) cf->can_id |= CAN_ERR_BUSOFF; - } else if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) { - priv->can.state = CAN_STATE_ERROR_PASSIVE; - priv->can.can_stats.error_passive++; - if (skb) { - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = (rxerr > 127) ? - CAN_ERR_CRTL_RX_PASSIVE : - CAN_ERR_CRTL_TX_PASSIVE; - cf->data[6] = txerr; - cf->data[7] = rxerr; - } - } else if (status & XCAN_SR_ERRWRN_MASK) { - priv->can.state = CAN_STATE_ERROR_WARNING; - priv->can.can_stats.error_warning++; - if (skb) { - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] |= (txerr > rxerr) ? - CAN_ERR_CRTL_TX_WARNING : - CAN_ERR_CRTL_RX_WARNING; - cf->data[6] = txerr; - cf->data[7] = rxerr; - } + } else { + enum can_state new_state = xcan_current_error_state(ndev); + + xcan_set_error_state(ndev, new_state, skb ? cf : NULL); } /* Check for Arbitration lost interrupt */ @@ -714,8 +810,10 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota) isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } - if (work_done) + if (work_done) { can_led_event(ndev, CAN_LED_EVENT_RX); + xcan_update_error_state_after_rxtx(ndev); + } if (work_done < quota) { napi_complete_done(napi, work_done); @@ -746,6 +844,7 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr) isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } can_led_event(ndev, CAN_LED_EVENT_TX); + xcan_update_error_state_after_rxtx(ndev); netif_wake_queue(ndev); } -- GitLab From 96bf3257c86655b7f6bac8566d916f25cd3aa946 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Tue, 7 Feb 2017 13:23:04 +0200 Subject: [PATCH 0351/1291] can: xilinx_can: fix device dropping off bus on RX overrun commit 2574fe54515ed3487405de329e4e9f13d7098c10 upstream. The xilinx_can driver performs a software reset when an RX overrun is detected. This causes the device to enter Configuration mode where no messages are received or transmitted. The documentation does not mention any need to perform a reset on an RX overrun, and testing by inducing an RX overflow also indicated that the device continues to work just fine without a reset. Remove the software reset. Tested with the integrated CAN on Zynq-7000 SoC. Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/xilinx_can.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index cc954a794f5a..8a94489aa125 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -696,7 +696,6 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) if (isr & XCAN_IXR_RXOFLW_MASK) { stats->rx_over_errors++; stats->rx_errors++; - priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK); if (skb) { cf->can_id |= CAN_ERR_CRTL; cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW; -- GitLab From 189c7890f33b21764289e05dde76a196ebf64587 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Thu, 23 Feb 2017 14:50:03 +0200 Subject: [PATCH 0352/1291] can: xilinx_can: keep only 1-2 frames in TX FIFO to fix TX accounting commit 620050d9c2be15c47017ba95efe59e0832e99a56 upstream. The xilinx_can driver assumes that the TXOK interrupt only clears after it has been acknowledged as many times as there have been successfully sent frames. However, the documentation does not mention such behavior, instead saying just that the interrupt is cleared when the clear bit is set. Similarly, testing seems to also suggest that it is immediately cleared regardless of the amount of frames having been sent. Performing some heavy TX load and then going back to idle has the tx_head drifting further away from tx_tail over time, steadily reducing the amount of frames the driver keeps in the TX FIFO (but not to zero, as the TXOK interrupt always frees up space for 1 frame from the driver's perspective, so frames continue to be sent) and delaying the local echo frames. The TX FIFO tracking is also otherwise buggy as it does not account for TX FIFO being cleared after software resets, causing BUG!, TX FIFO full when queue awake! messages to be output. There does not seem to be any way to accurately track the state of the TX FIFO for local echo support while using the full TX FIFO. The Zynq version of the HW (but not the soft-AXI version) has watermark programming support and with it an additional TX-FIFO-empty interrupt bit. Modify the driver to only put 1 frame into TX FIFO at a time on soft-AXI and 2 frames at a time on Zynq. On Zynq the TXFEMP interrupt bit is used to detect whether 1 or 2 frames have been sent at interrupt processing time. Tested with the integrated CAN on Zynq-7000 SoC. The 1-frame-FIFO mode was also tested. An alternative way to solve this would be to drop local echo support but keep using the full TX FIFO. v2: Add FIFO space check before TX queue wake with locking to synchronize with queue stop. This avoids waking the queue when xmit() had just filled it. v3: Keep local echo support and reduce the amount of frames in FIFO instead as suggested by Marc Kleine-Budde. Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/xilinx_can.c | 139 +++++++++++++++++++++++++++++++---- 1 file changed, 123 insertions(+), 16 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 8a94489aa125..63f4d3eddb0f 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -26,8 +26,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -119,6 +121,7 @@ enum xcan_reg { /** * struct xcan_priv - This definition define CAN driver instance * @can: CAN private data structure. + * @tx_lock: Lock for synchronizing TX interrupt handling * @tx_head: Tx CAN packets ready to send on the queue * @tx_tail: Tx CAN packets successfully sended on the queue * @tx_max: Maximum number packets the driver can send @@ -133,6 +136,7 @@ enum xcan_reg { */ struct xcan_priv { struct can_priv can; + spinlock_t tx_lock; unsigned int tx_head; unsigned int tx_tail; unsigned int tx_max; @@ -160,6 +164,11 @@ static const struct can_bittiming_const xcan_bittiming_const = { .brp_inc = 1, }; +#define XCAN_CAP_WATERMARK 0x0001 +struct xcan_devtype_data { + unsigned int caps; +}; + /** * xcan_write_reg_le - Write a value to the device register little endian * @priv: Driver private data structure @@ -239,6 +248,10 @@ static int set_reset_mode(struct net_device *ndev) usleep_range(500, 10000); } + /* reset clears FIFOs */ + priv->tx_head = 0; + priv->tx_tail = 0; + return 0; } @@ -393,6 +406,7 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct net_device_stats *stats = &ndev->stats; struct can_frame *cf = (struct can_frame *)skb->data; u32 id, dlc, data[2] = {0, 0}; + unsigned long flags; if (can_dropped_invalid_skb(ndev, skb)) return NETDEV_TX_OK; @@ -440,6 +454,9 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) data[1] = be32_to_cpup((__be32 *)(cf->data + 4)); can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max); + + spin_lock_irqsave(&priv->tx_lock, flags); + priv->tx_head++; /* Write the Frame to Xilinx CAN TX FIFO */ @@ -455,10 +472,16 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) stats->tx_bytes += cf->can_dlc; } + /* Clear TX-FIFO-empty interrupt for xcan_tx_interrupt() */ + if (priv->tx_max > 1) + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXFEMP_MASK); + /* Check if the TX buffer is full */ if ((priv->tx_head - priv->tx_tail) == priv->tx_max) netif_stop_queue(ndev); + spin_unlock_irqrestore(&priv->tx_lock, flags); + return NETDEV_TX_OK; } @@ -832,19 +855,71 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr) { struct xcan_priv *priv = netdev_priv(ndev); struct net_device_stats *stats = &ndev->stats; + unsigned int frames_in_fifo; + int frames_sent = 1; /* TXOK => at least 1 frame was sent */ + unsigned long flags; + int retries = 0; + + /* Synchronize with xmit as we need to know the exact number + * of frames in the FIFO to stay in sync due to the TXFEMP + * handling. + * This also prevents a race between netif_wake_queue() and + * netif_stop_queue(). + */ + spin_lock_irqsave(&priv->tx_lock, flags); + + frames_in_fifo = priv->tx_head - priv->tx_tail; - while ((priv->tx_head - priv->tx_tail > 0) && - (isr & XCAN_IXR_TXOK_MASK)) { + if (WARN_ON_ONCE(frames_in_fifo == 0)) { + /* clear TXOK anyway to avoid getting back here */ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + spin_unlock_irqrestore(&priv->tx_lock, flags); + return; + } + + /* Check if 2 frames were sent (TXOK only means that at least 1 + * frame was sent). + */ + if (frames_in_fifo > 1) { + WARN_ON(frames_in_fifo > priv->tx_max); + + /* Synchronize TXOK and isr so that after the loop: + * (1) isr variable is up-to-date at least up to TXOK clear + * time. This avoids us clearing a TXOK of a second frame + * but not noticing that the FIFO is now empty and thus + * marking only a single frame as sent. + * (2) No TXOK is left. Having one could mean leaving a + * stray TXOK as we might process the associated frame + * via TXFEMP handling as we read TXFEMP *after* TXOK + * clear to satisfy (1). + */ + while ((isr & XCAN_IXR_TXOK_MASK) && !WARN_ON(++retries == 100)) { + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + isr = priv->read_reg(priv, XCAN_ISR_OFFSET); + } + + if (isr & XCAN_IXR_TXFEMP_MASK) { + /* nothing in FIFO anymore */ + frames_sent = frames_in_fifo; + } + } else { + /* single frame in fifo, just clear TXOK */ + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + } + + while (frames_sent--) { can_get_echo_skb(ndev, priv->tx_tail % priv->tx_max); priv->tx_tail++; stats->tx_packets++; - isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } + + netif_wake_queue(ndev); + + spin_unlock_irqrestore(&priv->tx_lock, flags); + can_led_event(ndev, CAN_LED_EVENT_TX); xcan_update_error_state_after_rxtx(ndev); - netif_wake_queue(ndev); } /** @@ -1138,6 +1213,18 @@ static const struct dev_pm_ops xcan_dev_pm_ops = { SET_RUNTIME_PM_OPS(xcan_runtime_suspend, xcan_runtime_resume, NULL) }; +static const struct xcan_devtype_data xcan_zynq_data = { + .caps = XCAN_CAP_WATERMARK, +}; + +/* Match table for OF platform binding */ +static const struct of_device_id xcan_of_match[] = { + { .compatible = "xlnx,zynq-can-1.0", .data = &xcan_zynq_data }, + { .compatible = "xlnx,axi-can-1.00.a", }, + { /* end of list */ }, +}; +MODULE_DEVICE_TABLE(of, xcan_of_match); + /** * xcan_probe - Platform registration call * @pdev: Handle to the platform device structure @@ -1152,8 +1239,10 @@ static int xcan_probe(struct platform_device *pdev) struct resource *res; /* IO mem resources */ struct net_device *ndev; struct xcan_priv *priv; + const struct of_device_id *of_id; + int caps = 0; void __iomem *addr; - int ret, rx_max, tx_max; + int ret, rx_max, tx_max, tx_fifo_depth; /* Get the virtual base address for the device */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1163,7 +1252,8 @@ static int xcan_probe(struct platform_device *pdev) goto err; } - ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", &tx_max); + ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", + &tx_fifo_depth); if (ret < 0) goto err; @@ -1171,6 +1261,30 @@ static int xcan_probe(struct platform_device *pdev) if (ret < 0) goto err; + of_id = of_match_device(xcan_of_match, &pdev->dev); + if (of_id) { + const struct xcan_devtype_data *devtype_data = of_id->data; + + if (devtype_data) + caps = devtype_data->caps; + } + + /* There is no way to directly figure out how many frames have been + * sent when the TXOK interrupt is processed. If watermark programming + * is supported, we can have 2 frames in the FIFO and use TXFEMP + * to determine if 1 or 2 frames have been sent. + * Theoretically we should be able to use TXFWMEMP to determine up + * to 3 frames, but it seems that after putting a second frame in the + * FIFO, with watermark at 2 frames, it can happen that TXFWMEMP (less + * than 2 frames in FIFO) is set anyway with no TXOK (a frame was + * sent), which is not a sensible state - possibly TXFWMEMP is not + * completely synchronized with the rest of the bits? + */ + if (caps & XCAN_CAP_WATERMARK) + tx_max = min(tx_fifo_depth, 2); + else + tx_max = 1; + /* Create a CAN device instance */ ndev = alloc_candev(sizeof(struct xcan_priv), tx_max); if (!ndev) @@ -1185,6 +1299,7 @@ static int xcan_probe(struct platform_device *pdev) CAN_CTRLMODE_BERR_REPORTING; priv->reg_base = addr; priv->tx_max = tx_max; + spin_lock_init(&priv->tx_lock); /* Get IRQ for the device */ ndev->irq = platform_get_irq(pdev, 0); @@ -1249,9 +1364,9 @@ static int xcan_probe(struct platform_device *pdev) pm_runtime_put(&pdev->dev); - netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth:%d\n", + netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth: actual %d, using %d\n", priv->reg_base, ndev->irq, priv->can.clock.freq, - priv->tx_max); + tx_fifo_depth, priv->tx_max); return 0; @@ -1285,14 +1400,6 @@ static int xcan_remove(struct platform_device *pdev) return 0; } -/* Match table for OF platform binding */ -static const struct of_device_id xcan_of_match[] = { - { .compatible = "xlnx,zynq-can-1.0", }, - { .compatible = "xlnx,axi-can-1.00.a", }, - { /* end of list */ }, -}; -MODULE_DEVICE_TABLE(of, xcan_of_match); - static struct platform_driver xcan_driver = { .probe = xcan_probe, .remove = xcan_remove, -- GitLab From 19c756e01b094e06f025f4a1facd006932455875 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 26 Feb 2018 14:39:59 +0200 Subject: [PATCH 0353/1291] can: xilinx_can: fix incorrect clear of non-processed interrupts commit 2f4f0f338cf453bfcdbcf089e177c16f35f023c8 upstream. xcan_interrupt() clears ERROR|RXOFLV|BSOFF|ARBLST interrupts if any of them is asserted. This does not take into account that some of them could have been asserted between interrupt status read and interrupt clear, therefore clearing them without handling them. Fix the code to only clear those interrupts that it knows are asserted and therefore going to be processed in xcan_err_interrupt(). Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula Cc: Michal Simek Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/xilinx_can.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 63f4d3eddb0f..62a58bf23dc8 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -938,6 +938,7 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) struct net_device *ndev = (struct net_device *)dev_id; struct xcan_priv *priv = netdev_priv(ndev); u32 isr, ier; + u32 isr_errors; /* Get the interrupt status from Xilinx CAN */ isr = priv->read_reg(priv, XCAN_ISR_OFFSET); @@ -956,11 +957,10 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) xcan_tx_interrupt(ndev, isr); /* Check for the type of error interrupt and Processing it */ - if (isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK | - XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK)) { - priv->write_reg(priv, XCAN_ICR_OFFSET, (XCAN_IXR_ERROR_MASK | - XCAN_IXR_RXOFLW_MASK | XCAN_IXR_BSOFF_MASK | - XCAN_IXR_ARBLST_MASK)); + isr_errors = isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK | + XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK); + if (isr_errors) { + priv->write_reg(priv, XCAN_ICR_OFFSET, isr_errors); xcan_err_interrupt(ndev, isr); } -- GitLab From 60454a9715df60af6f1538826a59b8e0b7d0156f Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 26 Feb 2018 14:27:13 +0200 Subject: [PATCH 0354/1291] can: xilinx_can: fix RX overflow interrupt not being enabled commit 83997997252f5d3fc7f04abc24a89600c2b504ab upstream. RX overflow interrupt (RXOFLW) is disabled even though xcan_interrupt() processes it. This means that an RX overflow interrupt will only be processed when another interrupt gets asserted (e.g. for RX/TX). Fix that by enabling the RXOFLW interrupt. Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula Cc: Michal Simek Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/xilinx_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 62a58bf23dc8..5a24039733ef 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -104,7 +104,7 @@ enum xcan_reg { #define XCAN_INTR_ALL (XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\ XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \ XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \ - XCAN_IXR_ARBLST_MASK) + XCAN_IXR_RXOFLW_MASK | XCAN_IXR_ARBLST_MASK) /* CAN register bit shift - XCAN___SHIFT */ #define XCAN_BTR_SJW_SHIFT 7 /* Synchronous jump width */ -- GitLab From a55d3d73d45787b091b1fadf1b1c0c23613860c1 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Thu, 21 Jun 2018 15:23:31 +0200 Subject: [PATCH 0355/1291] can: peak_canfd: fix firmware < v3.3.0: limit allocation to 32-bit DMA addr only commit 5d4c94ed9f564224d7b37dbee13f7c5d4a8a01ac upstream. The DMA logic in firmwares < v3.3.0 embedded in the PCAN-PCIe FD cards family is not capable of handling a mix of 32-bit and 64-bit logical addresses. If the board is equipped with 2 or 4 CAN ports, then such a situation might lead to a PCIe Bus Error "Malformed TLP" packet as well as "irq xx: nobody cared" issue. This patch adds a workaround that requests only 32-bit DMA addresses when these might be allocated outside of the 4 GB area. This issue has been fixed in firmware v3.3.0 and next. Signed-off-by: Stephane Grosjean Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/peak_canfd/peak_pciefd_main.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c index 3c51a884db87..fa689854f16b 100644 --- a/drivers/net/can/peak_canfd/peak_pciefd_main.c +++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c @@ -58,6 +58,10 @@ MODULE_LICENSE("GPL v2"); #define PCIEFD_REG_SYS_VER1 0x0040 /* version reg #1 */ #define PCIEFD_REG_SYS_VER2 0x0044 /* version reg #2 */ +#define PCIEFD_FW_VERSION(x, y, z) (((u32)(x) << 24) | \ + ((u32)(y) << 16) | \ + ((u32)(z) << 8)) + /* System Control Registers Bits */ #define PCIEFD_SYS_CTL_TS_RST 0x00000001 /* timestamp clock */ #define PCIEFD_SYS_CTL_CLK_EN 0x00000002 /* system clock */ @@ -783,6 +787,21 @@ static int peak_pciefd_probe(struct pci_dev *pdev, "%ux CAN-FD PCAN-PCIe FPGA v%u.%u.%u:\n", can_count, hw_ver_major, hw_ver_minor, hw_ver_sub); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + /* FW < v3.3.0 DMA logic doesn't handle correctly the mix of 32-bit and + * 64-bit logical addresses: this workaround forces usage of 32-bit + * DMA addresses only when such a fw is detected. + */ + if (PCIEFD_FW_VERSION(hw_ver_major, hw_ver_minor, hw_ver_sub) < + PCIEFD_FW_VERSION(3, 3, 0)) { + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) + dev_warn(&pdev->dev, + "warning: can't set DMA mask %llxh (err %d)\n", + DMA_BIT_MASK(32), err); + } +#endif + /* stop system clock */ pciefd_sys_writereg(pciefd, PCIEFD_SYS_CTL_CLK_EN, PCIEFD_REG_SYS_CTL_CLR); -- GitLab From 08382d3a1be25a41d9d82b8bba9a26c9b2b22cf3 Mon Sep 17 00:00:00 2001 From: Roman Fietze Date: Wed, 11 Jul 2018 15:36:14 +0200 Subject: [PATCH 0356/1291] can: m_can.c: fix setup of CCCR register: clear CCCR NISO bit before checking can.ctrlmode commit 393753b217f05474e714aea36c37501546ed1202 upstream. Inside m_can_chip_config(), when setting up the new value of the CCCR, the CCCR_NISO bit is not cleared like the others, CCCR_TEST, CCCR_MON, CCCR_BRSE and CCCR_FDOE, before checking the can.ctrlmode bits for CAN_CTRLMODE_FD_NON_ISO. This way once the controller was configured for CAN_CTRLMODE_FD_NON_ISO, this mode could never be cleared again. This fix is only relevant for controllers with version 3.1.x or 3.2.x. Older versions do not support NISO. Signed-off-by: Roman Fietze Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/m_can/m_can.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 5d4e61741476..ca3fa82316c2 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1073,7 +1073,8 @@ static void m_can_chip_config(struct net_device *dev) } else { /* Version 3.1.x or 3.2.x */ - cccr &= ~(CCCR_TEST | CCCR_MON | CCCR_BRSE | CCCR_FDOE); + cccr &= ~(CCCR_TEST | CCCR_MON | CCCR_BRSE | CCCR_FDOE | + CCCR_NISO); /* Only 3.2.x has NISO Bit implemented */ if (priv->can.ctrlmode & CAN_CTRLMODE_FD_NON_ISO) -- GitLab From e94f784fddd58418bf5d97a23cf331d04a1bf5b1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 26 Jul 2018 10:13:22 +0200 Subject: [PATCH 0357/1291] turn off -Wattribute-alias Starting with gcc-8.1, we get a warning about all system call definitions, which use an alias between functions with incompatible prototypes, e.g.: In file included from ../mm/process_vm_access.c:19: ../include/linux/syscalls.h:211:18: warning: 'sys_process_vm_readv' alias between functions of incompatible types 'long int(pid_t, const struct iovec *, long unsigned int, const struct iovec *, long unsigned int, long unsigned int)' {aka 'long int(int, const struct iovec *, long unsigned int, const struct iovec *, long unsigned int, long unsigned int)'} and 'long int(long int, long int, long int, long int, long int, long int)' [-Wattribute-alias] asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ ^~~ ../include/linux/syscalls.h:207:2: note: in expansion of macro '__SYSCALL_DEFINEx' __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) ^~~~~~~~~~~~~~~~~ ../include/linux/syscalls.h:201:36: note: in expansion of macro 'SYSCALL_DEFINEx' #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) ^~~~~~~~~~~~~~~ ../mm/process_vm_access.c:300:1: note: in expansion of macro 'SYSCALL_DEFINE6' SYSCALL_DEFINE6(process_vm_readv, pid_t, pid, const struct iovec __user *, lvec, ^~~~~~~~~~~~~~~ ../include/linux/syscalls.h:215:18: note: aliased declaration here asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ ^~~ ../include/linux/syscalls.h:207:2: note: in expansion of macro '__SYSCALL_DEFINEx' __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) ^~~~~~~~~~~~~~~~~ ../include/linux/syscalls.h:201:36: note: in expansion of macro 'SYSCALL_DEFINEx' #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) ^~~~~~~~~~~~~~~ ../mm/process_vm_access.c:300:1: note: in expansion of macro 'SYSCALL_DEFINE6' SYSCALL_DEFINE6(process_vm_readv, pid_t, pid, const struct iovec __user *, lvec, This is really noisy and does not indicate a real problem. In the latest mainline kernel, this was addressed by commit bee20031772a ("disable -Wattribute-alias warning for SYSCALL_DEFINEx()"), which seems too invasive to backport. This takes a much simpler approach and just disables the warning across the kernel. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index ffc9b4e3867e..7c886c9ed9c8 100644 --- a/Makefile +++ b/Makefile @@ -642,6 +642,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context) +KBUILD_CFLAGS += $(call cc-disable-warning, attribute-alias) ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += $(call cc-option,-Oz,-Os) -- GitLab From 53208e12faa5b8c6eac4eb1d23d6e3fae450fc5a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 28 Jul 2018 07:55:45 +0200 Subject: [PATCH 0358/1291] Linux 4.14.59 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7c886c9ed9c8..81b0e99dce80 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 58 +SUBLEVEL = 59 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 2d5fc7ffa84bfa0d778c30b96072295ad36a1030 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 20 Apr 2018 14:55:31 -0700 Subject: [PATCH 0359/1291] fork: unconditionally clear stack on fork commit e01e80634ecdde1dd113ac43b3adad21b47f3957 upstream. One of the classes of kernel stack content leaks[1] is exposing the contents of prior heap or stack contents when a new process stack is allocated. Normally, those stacks are not zeroed, and the old contents remain in place. In the face of stack content exposure flaws, those contents can leak to userspace. Fixing this will make the kernel no longer vulnerable to these flaws, as the stack will be wiped each time a stack is assigned to a new process. There's not a meaningful change in runtime performance; it almost looks like it provides a benefit. Performing back-to-back kernel builds before: Run times: 157.86 157.09 158.90 160.94 160.80 Mean: 159.12 Std Dev: 1.54 and after: Run times: 159.31 157.34 156.71 158.15 160.81 Mean: 158.46 Std Dev: 1.46 Instead of making this a build or runtime config, Andy Lutomirski recommended this just be enabled by default. [1] A noisy search for many kinds of stack content leaks can be seen here: https://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=linux+kernel+stack+leak I did some more with perf and cycle counts on running 100,000 execs of /bin/true. before: Cycles: 218858861551 218853036130 214727610969 227656844122 224980542841 Mean: 221015379122.60 Std Dev: 4662486552.47 after: Cycles: 213868945060 213119275204 211820169456 224426673259 225489986348 Mean: 217745009865.40 Std Dev: 5935559279.99 It continues to look like it's faster, though the deviation is rather wide, but I'm not sure what I could do that would be less noisy. I'm open to ideas! Link: http://lkml.kernel.org/r/20180221021659.GA37073@beast Signed-off-by: Kees Cook Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Andy Lutomirski Cc: Laura Abbott Cc: Rasmus Villemoes Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/thread_info.h | 6 +----- kernel/fork.c | 3 +-- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 34f053a150a9..cf2862bd134a 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -43,11 +43,7 @@ enum { #define THREAD_ALIGN THREAD_SIZE #endif -#if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK) -# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) -#else -# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT) -#endif +#define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) /* * flag set/clear/test wrappers diff --git a/kernel/fork.c b/kernel/fork.c index 98c91bd341b4..91907a3701ce 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -215,10 +215,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) if (!s) continue; -#ifdef CONFIG_DEBUG_KMEMLEAK /* Clear stale pointers from reused stack. */ memset(s->addr, 0, THREAD_SIZE); -#endif + tsk->stack_vm_area = s; return s->addr; } -- GitLab From c0b86d269bf097fd100e275574ae50f1aec248ad Mon Sep 17 00:00:00 2001 From: Lixin Wang Date: Mon, 27 Nov 2017 15:06:55 +0800 Subject: [PATCH 0360/1291] i2c: core: decrease reference count of device node in i2c_unregister_device commit e0638fa400eaccf9fa8060f67140264c4e276552 upstream. Reference count of device node was increased in of_i2c_register_device, but without decreasing it in i2c_unregister_device. Then the added device node will never be released. Fix this by adding the of_node_put. Signed-off-by: Lixin Wang Tested-by: Wolfram Sang Signed-off-by: Wolfram Sang Cc: stable@kernel.org Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-core-base.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 56e46581b84b..6f2fe63e8f5a 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -808,8 +808,11 @@ EXPORT_SYMBOL_GPL(i2c_new_device); */ void i2c_unregister_device(struct i2c_client *client) { - if (client->dev.of_node) + if (client->dev.of_node) { of_node_clear_flag(client->dev.of_node, OF_POPULATED); + of_node_put(client->dev.of_node); + } + if (ACPI_COMPANION(&client->dev)) acpi_device_clear_enumerated(ACPI_COMPANION(&client->dev)); device_unregister(&client->dev); -- GitLab From 3af618717e710a4b4d635eaef87645bc0e99c71a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 1 Mar 2018 14:00:28 -0800 Subject: [PATCH 0361/1291] RDMA/core: Avoid that ib_drain_qp() triggers an out-of-bounds stack access commit a1ae7d0345edd593d6725d3218434d903a0af95d upstream. This patch fixes the following KASAN complaint: ================================================================== BUG: KASAN: stack-out-of-bounds in rxe_post_send+0x77d/0x9b0 [rdma_rxe] Read of size 8 at addr ffff880061aef860 by task 01/1080 CPU: 2 PID: 1080 Comm: 01 Not tainted 4.16.0-rc3-dbg+ #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0x85/0xc7 print_address_description+0x65/0x270 kasan_report+0x231/0x350 rxe_post_send+0x77d/0x9b0 [rdma_rxe] __ib_drain_sq+0x1ad/0x250 [ib_core] ib_drain_qp+0x9/0x30 [ib_core] srp_destroy_qp+0x51/0x70 [ib_srp] srp_free_ch_ib+0xfc/0x380 [ib_srp] srp_create_target+0x1071/0x19e0 [ib_srp] kernfs_fop_write+0x180/0x210 __vfs_write+0xb1/0x2e0 vfs_write+0xf6/0x250 SyS_write+0x99/0x110 do_syscall_64+0xee/0x2b0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 The buggy address belongs to the page: page:ffffea000186bbc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0x4000000000000000() raw: 4000000000000000 0000000000000000 0000000000000000 00000000ffffffff raw: 0000000000000000 ffffea000186bbe0 0000000000000000 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff880061aef700: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff880061aef780: 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 >ffff880061aef800: f2 f2 f2 f2 f2 f2 f2 00 00 00 00 00 f2 f2 f2 f2 ^ ffff880061aef880: f2 f2 f2 00 00 00 00 00 00 00 00 00 00 00 f2 f2 ffff880061aef900: f2 f2 f2 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== Fixes: 765d67748bcf ("IB: new common API for draining queues") Signed-off-by: Bart Van Assche Cc: Steve Wise Cc: Sagi Grimberg Cc: stable@vger.kernel.org Signed-off-by: Jason Gunthorpe Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/verbs.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 9032f77cc38d..195f0bc3f4ee 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2115,10 +2115,15 @@ static void __ib_drain_sq(struct ib_qp *qp) struct ib_cq *cq = qp->send_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_drain_cqe sdrain; - struct ib_send_wr swr = {}, *bad_swr; + struct ib_send_wr *bad_swr; + struct ib_rdma_wr swr = { + .wr = { + .opcode = IB_WR_RDMA_WRITE, + .wr_cqe = &sdrain.cqe, + }, + }; int ret; - swr.wr_cqe = &sdrain.cqe; sdrain.cqe.done = ib_drain_qp_done; init_completion(&sdrain.done); @@ -2128,7 +2133,7 @@ static void __ib_drain_sq(struct ib_qp *qp) return; } - ret = ib_post_send(qp, &swr, &bad_swr); + ret = ib_post_send(qp, &swr.wr, &bad_swr); if (ret) { WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); return; -- GitLab From d02c9c8bfef147c790985db5a348a91ab0deec66 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 13 Mar 2018 14:51:57 -0700 Subject: [PATCH 0362/1291] drivers/infiniband/core/verbs.c: fix build with gcc-4.4.4 commit 6ee687735e745eafae9e6b93d1ea70bc52e7ad07 upstream. gcc-4.4.4 has issues with initialization of anonymous unions. drivers/infiniband/core/verbs.c: In function '__ib_drain_sq': drivers/infiniband/core/verbs.c:2204: error: unknown field 'wr_cqe' specified in initializer drivers/infiniband/core/verbs.c:2204: warning: initialization makes integer from pointer without a cast Work around this. Fixes: a1ae7d0345edd5 ("RDMA/core: Avoid that ib_drain_qp() triggers an out-of-bounds stack access") Cc: Bart Van Assche Cc: Steve Wise Cc: Sagi Grimberg Cc: Jason Gunthorpe Cc: Signed-off-by: Andrew Morton Signed-off-by: Doug Ledford Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/verbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 195f0bc3f4ee..feb80dbb5948 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2118,8 +2118,9 @@ static void __ib_drain_sq(struct ib_qp *qp) struct ib_send_wr *bad_swr; struct ib_rdma_wr swr = { .wr = { + .next = NULL, + { .wr_cqe = &sdrain.cqe, }, .opcode = IB_WR_RDMA_WRITE, - .wr_cqe = &sdrain.cqe, }, }; int ret; -- GitLab From 1e8bb2e9c9df0d0e491e5022ce80a2b96b757ac3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 1 Mar 2018 14:00:30 -0800 Subject: [PATCH 0363/1291] IB/srpt: Fix an out-of-bounds stack access in srpt_zerolength_write() commit 2a78cb4db487372152bed2055c038f9634d595e8 upstream. Avoid triggering an out-of-bounds stack access by changing the type of 'wr' from ib_send_wr into ib_rdma_wr. This patch fixes the following KASAN bug report: BUG: KASAN: stack-out-of-bounds in rxe_post_send+0x7a9/0x9a0 [rdma_rxe] Read of size 8 at addr ffff880068197a48 by task kworker/2:1/44 Workqueue: ib_cm cm_work_handler [ib_cm] Call Trace: dump_stack+0x8e/0xcd print_address_description+0x6f/0x280 kasan_report+0x25a/0x380 __asan_load8+0x54/0x90 rxe_post_send+0x7a9/0x9a0 [rdma_rxe] srpt_zerolength_write+0xf0/0x180 [ib_srpt] srpt_cm_rtu_recv+0x68/0x110 [ib_srpt] srpt_rdma_cm_handler+0xbb/0x15b [ib_srpt] cma_ib_handler+0x1aa/0x4a0 [rdma_cm] cm_process_work+0x30/0x100 [ib_cm] cm_work_handler+0xa86/0x351b [ib_cm] process_one_work+0x475/0x9f0 worker_thread+0x69/0x690 kthread+0x1ad/0x1d0 ret_from_fork+0x3a/0x50 Fixes: aaf45bd83eba ("IB/srpt: Detect session shutdown reliably") Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Jason Gunthorpe Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srpt/ib_srpt.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index ee578fa713c2..5a5e1343b386 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -787,13 +787,16 @@ static int srpt_post_recv(struct srpt_device *sdev, */ static int srpt_zerolength_write(struct srpt_rdma_ch *ch) { - struct ib_send_wr wr, *bad_wr; + struct ib_send_wr *bad_wr; + struct ib_rdma_wr wr = { + .wr = { + .opcode = IB_WR_RDMA_WRITE, + .wr_cqe = &ch->zw_cqe, + .send_flags = IB_SEND_SIGNALED, + } + }; - memset(&wr, 0, sizeof(wr)); - wr.opcode = IB_WR_RDMA_WRITE; - wr.wr_cqe = &ch->zw_cqe; - wr.send_flags = IB_SEND_SIGNALED; - return ib_post_send(ch->qp, &wr, &bad_wr); + return ib_post_send(ch->qp, &wr.wr, &bad_wr); } static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) -- GitLab From e581f7c590cca70b7338f43b73f4cfa081077c00 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 13 Mar 2018 15:06:45 -0700 Subject: [PATCH 0364/1291] drivers/infiniband/ulp/srpt/ib_srpt.c: fix build with gcc-4.4.4 commit 06892cc190550807d332c95a0114c7e175584012 upstream. gcc-4.4.4 has issues with initialization of anonymous unions: drivers/infiniband/ulp/srpt/ib_srpt.c: In function 'srpt_zerolength_write': drivers/infiniband/ulp/srpt/ib_srpt.c:854: error: unknown field 'wr_cqe' specified in initializer drivers/infiniband/ulp/srpt/ib_srpt.c:854: warning: initialization makes integer from pointer without a cast Work aound this. Fixes: 2a78cb4db487 ("IB/srpt: Fix an out-of-bounds stack access in srpt_zerolength_write()") Cc: Bart Van Assche Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: Signed-off-by: Andrew Morton Signed-off-by: Doug Ledford Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 5a5e1343b386..97c2225829ea 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -790,8 +790,9 @@ static int srpt_zerolength_write(struct srpt_rdma_ch *ch) struct ib_send_wr *bad_wr; struct ib_rdma_wr wr = { .wr = { + .next = NULL, + { .wr_cqe = &ch->zw_cqe, }, .opcode = IB_WR_RDMA_WRITE, - .wr_cqe = &ch->zw_cqe, .send_flags = IB_SEND_SIGNALED, } }; -- GitLab From c09032b71fc6fa7d8a010b3a58ee4ca2ae9e1a09 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 16 May 2018 10:42:39 +0200 Subject: [PATCH 0365/1291] spi: spi-s3c64xx: Fix system resume support commit e935dba111621bd6a0c5d48e6511a4d9885103b4 upstream. Since Linux v4.10 release (commit 1d9174fbc55e "PM / Runtime: Defer resuming of the device in pm_runtime_force_resume()"), pm_runtime_force_resume() function doesn't runtime resume device if it was not runtime active before system suspend. Thus, driver should not do any register access after pm_runtime_force_resume() without checking the runtime status of the device. To fix this issue, simply move s3c64xx_spi_hwinit() call to s3c64xx_spi_runtime_resume() to ensure that hardware is always properly initialized. This fixes Synchronous external abort issue on system suspend/resume cycle on newer Exynos SoCs. Signed-off-by: Marek Szyprowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-s3c64xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index b392cca8fa4f..1a6ec226d6e4 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -1273,8 +1273,6 @@ static int s3c64xx_spi_resume(struct device *dev) if (ret < 0) return ret; - s3c64xx_spi_hwinit(sdd, sdd->port_id); - return spi_master_resume(master); } #endif /* CONFIG_PM_SLEEP */ @@ -1312,6 +1310,8 @@ static int s3c64xx_spi_runtime_resume(struct device *dev) if (ret != 0) goto err_disable_src_clk; + s3c64xx_spi_hwinit(sdd, sdd->port_id); + return 0; err_disable_src_clk: -- GitLab From b4667635de2e60b1ffb059f03d3d2977258849f0 Mon Sep 17 00:00:00 2001 From: Donald Shanty III Date: Wed, 4 Jul 2018 15:50:47 +0000 Subject: [PATCH 0366/1291] Input: elan_i2c - add ACPI ID for lenovo ideapad 330 commit 938f45008d8bc391593c97508bc798cc95a52b9b upstream. This allows Elan driver to bind to the touchpad found in Lenovo Ideapad 330 series laptops. Signed-off-by: Donald Shanty III Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 7b5fa501bbcf..471c3e1a533a 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1262,6 +1262,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0611", 0 }, { "ELAN0612", 0 }, { "ELAN0618", 0 }, + { "ELAN061D", 0 }, { "ELAN1000", 0 }, { } }; -- GitLab From e0e385e2126e0b73af8f54301b818499d2b3e6db Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 18 Jul 2018 17:24:35 +0000 Subject: [PATCH 0367/1291] Input: i8042 - add Lenovo LaVie Z to the i8042 reset list commit 384cf4285b34e08917e3e66603382f2b0c4f6e1b upstream. The Lenovo LaVie Z laptop requires i8042 to be reset in order to consistently detect its Elantech touchpad. The nomux and kbdreset quirks are not sufficient. It's possible the other LaVie Z models from NEC require this as well. Cc: stable@vger.kernel.org Signed-off-by: Chen-Yu Tsai Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index b353d494ad40..136f6e7bf797 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -527,6 +527,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "N24_25BU"), }, }, + { + /* Lenovo LaVie Z */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo LaVie Z"), + }, + }, { } }; -- GitLab From ca6427facd967cbb7067422c1f3ab085874bb037 Mon Sep 17 00:00:00 2001 From: KT Liao Date: Mon, 16 Jul 2018 12:10:03 +0000 Subject: [PATCH 0368/1291] Input: elan_i2c - add another ACPI ID for Lenovo Ideapad 330-15AST commit 6f88a6439da5d94de334a341503bc2c7f4a7ea7f upstream. Add ELAN0622 to ACPI mapping table to support Elan touchpad found in Ideapad 330-15AST. Signed-off-by: KT Liao Reported-by: Anant Shende Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 471c3e1a533a..696e540304fd 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1263,6 +1263,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0612", 0 }, { "ELAN0618", 0 }, { "ELAN061D", 0 }, + { "ELAN0622", 0 }, { "ELAN1000", 0 }, { } }; -- GitLab From 8eead4f5dea9fdf01ac254171e3cc141181b825b Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 26 Jul 2018 16:37:45 -0700 Subject: [PATCH 0369/1291] kvm, mm: account shadow page tables to kmemcg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d97e5e6160c0e0a23963ec198c7cb1c69e6bf9e8 upstream. The size of kvm's shadow page tables corresponds to the size of the guest virtual machines on the system. Large VMs can spend a significant amount of memory as shadow page tables which can not be left as system memory overhead. So, account shadow page tables to the kmemcg. [shakeelb@google.com: replace (GFP_KERNEL|__GFP_ACCOUNT) with GFP_KERNEL_ACCOUNT] Link: http://lkml.kernel.org/r/20180629140224.205849-1-shakeelb@google.com Link: http://lkml.kernel.org/r/20180627181349.149778-1-shakeelb@google.com Signed-off-by: Shakeel Butt Cc: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Paolo Bonzini Cc: Greg Thelen Cc: Radim Krčmář Cc: Peter Feiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 43bbece92632..2ef2f1fe875b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -890,7 +890,7 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, if (cache->nobjs >= min) return 0; while (cache->nobjs < ARRAY_SIZE(cache->objects)) { - page = (void *)__get_free_page(GFP_KERNEL); + page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT); if (!page) return -ENOMEM; cache->objects[cache->nobjs++] = page; -- GitLab From a2f85c02810f91af6f2b7dd5de6a0ffb4c835b67 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 26 Jul 2018 16:37:08 -0700 Subject: [PATCH 0370/1291] delayacct: fix crash in delayacct_blkio_end() after delayacct init failure commit b512719f771a82180211c9a315b8a7f628832b3d upstream. While forking, if delayacct init fails due to memory shortage, it continues expecting all delayacct users to check task->delays pointer against NULL before dereferencing it, which all of them used to do. Commit c96f5471ce7d ("delayacct: Account blkio completion on the correct task"), while updating delayacct_blkio_end() to take the target task instead of always using %current, made the function test NULL on %current->delays and then continue to operated on @p->delays. If %current succeeded init while @p didn't, it leads to the following crash. BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 IP: __delayacct_blkio_end+0xc/0x40 PGD 8000001fd07e1067 P4D 8000001fd07e1067 PUD 1fcffbb067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 4 PID: 25774 Comm: QIOThread0 Not tainted 4.16.0-9_fbk1_rc2_1180_g6b593215b4d7 #9 RIP: 0010:__delayacct_blkio_end+0xc/0x40 Call Trace: try_to_wake_up+0x2c0/0x600 autoremove_wake_function+0xe/0x30 __wake_up_common+0x74/0x120 wake_up_page_bit+0x9c/0xe0 mpage_end_io+0x27/0x70 blk_update_request+0x78/0x2c0 scsi_end_request+0x2c/0x1e0 scsi_io_completion+0x20b/0x5f0 blk_mq_complete_request+0xa2/0x100 ata_scsi_qc_complete+0x79/0x400 ata_qc_complete_multiple+0x86/0xd0 ahci_handle_port_interrupt+0xc9/0x5c0 ahci_handle_port_intr+0x54/0xb0 ahci_single_level_irq_intr+0x3b/0x60 __handle_irq_event_percpu+0x43/0x190 handle_irq_event_percpu+0x20/0x50 handle_irq_event+0x2a/0x50 handle_edge_irq+0x80/0x1c0 handle_irq+0xaf/0x120 do_IRQ+0x41/0xc0 common_interrupt+0xf/0xf Fix it by updating delayacct_blkio_end() check @p->delays instead. Link: http://lkml.kernel.org/r/20180724175542.GP1934745@devbig577.frc2.facebook.com Fixes: c96f5471ce7d ("delayacct: Account blkio completion on the correct task") Signed-off-by: Tejun Heo Reported-by: Dave Jones Debugged-by: Dave Jones Reviewed-by: Andrew Morton Cc: Josh Snyder Cc: [4.15+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/delayacct.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 5e335b6203f4..41ee6dea7531 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -124,7 +124,7 @@ static inline void delayacct_blkio_start(void) static inline void delayacct_blkio_end(struct task_struct *p) { - if (current->delays) + if (p->delays) __delayacct_blkio_end(p); delayacct_clear_flag(DELAYACCT_PF_BLKIO); } -- GitLab From 9158a7debe5351688515de22fd18a5a8b006f458 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 24 Jul 2018 19:13:31 -0400 Subject: [PATCH 0371/1291] tracing: Fix double free of event_trigger_data commit 1863c387259b629e4ebfb255495f67cd06aa229b upstream. Running the following: # cd /sys/kernel/debug/tracing # echo 500000 > buffer_size_kb [ Or some other number that takes up most of memory ] # echo snapshot > events/sched/sched_switch/trigger Triggers the following bug: ------------[ cut here ]------------ kernel BUG at mm/slub.c:296! invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC PTI CPU: 6 PID: 6878 Comm: bash Not tainted 4.18.0-rc6-test+ #1066 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 RIP: 0010:kfree+0x16c/0x180 Code: 05 41 0f b6 72 51 5b 5d 41 5c 4c 89 d7 e9 ac b3 f8 ff 48 89 d9 48 89 da 41 b8 01 00 00 00 5b 5d 41 5c 4c 89 d6 e9 f4 f3 ff ff <0f> 0b 0f 0b 48 8b 3d d9 d8 f9 00 e9 c1 fe ff ff 0f 1f 40 00 0f 1f RSP: 0018:ffffb654436d3d88 EFLAGS: 00010246 RAX: ffff91a9d50f3d80 RBX: ffff91a9d50f3d80 RCX: ffff91a9d50f3d80 RDX: 00000000000006a4 RSI: ffff91a9de5a60e0 RDI: ffff91a9d9803500 RBP: ffffffff8d267c80 R08: 00000000000260e0 R09: ffffffff8c1a56be R10: fffff0d404543cc0 R11: 0000000000000389 R12: ffffffff8c1a56be R13: ffff91a9d9930e18 R14: ffff91a98c0c2890 R15: ffffffff8d267d00 FS: 00007f363ea64700(0000) GS:ffff91a9de580000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055c1cacc8e10 CR3: 00000000d9b46003 CR4: 00000000001606e0 Call Trace: event_trigger_callback+0xee/0x1d0 event_trigger_write+0xfc/0x1a0 __vfs_write+0x33/0x190 ? handle_mm_fault+0x115/0x230 ? _cond_resched+0x16/0x40 vfs_write+0xb0/0x190 ksys_write+0x52/0xc0 do_syscall_64+0x5a/0x160 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f363e16ab50 Code: 73 01 c3 48 8b 0d 38 83 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 79 db 2c 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 1e e3 01 00 48 89 04 24 RSP: 002b:00007fff9a4c6378 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000009 RCX: 00007f363e16ab50 RDX: 0000000000000009 RSI: 000055c1cacc8e10 RDI: 0000000000000001 RBP: 000055c1cacc8e10 R08: 00007f363e435740 R09: 00007f363ea64700 R10: 0000000000000073 R11: 0000000000000246 R12: 0000000000000009 R13: 0000000000000001 R14: 00007f363e4345e0 R15: 00007f363e4303c0 Modules linked in: ip6table_filter ip6_tables snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device i915 snd_pcm snd_timer i2c_i801 snd soundcore i2c_algo_bit drm_kms_helper 86_pkg_temp_thermal video kvm_intel kvm irqbypass wmi e1000e ---[ end trace d301afa879ddfa25 ]--- The cause is because the register_snapshot_trigger() call failed to allocate the snapshot buffer, and then called unregister_trigger() which freed the data that was passed to it. Then on return to the function that called register_snapshot_trigger(), as it sees it failed to register, it frees the trigger_data again and causes a double free. By calling event_trigger_init() on the trigger_data (which only ups the reference counter for it), and then event_trigger_free() afterward, the trigger_data would not get freed by the registering trigger function as it would only up and lower the ref count for it. If the register trigger function fails, then the event_trigger_free() called after it will free the trigger data normally. Link: http://lkml.kernel.org/r/20180724191331.738eb819@gandalf.local.home Cc: stable@vger.kerne.org Fixes: 93e31ffbf417 ("tracing: Add 'snapshot' event trigger command") Reported-by: Masami Hiramatsu Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_trigger.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index b413fab7d75b..d656e32dc853 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -680,6 +680,8 @@ event_trigger_callback(struct event_command *cmd_ops, goto out_free; out_reg: + /* Up the trigger_data count to make sure reg doesn't free it on failure */ + event_trigger_init(trigger_ops, trigger_data); ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); /* * The above returns on success the # of functions enabled, @@ -687,11 +689,13 @@ event_trigger_callback(struct event_command *cmd_ops, * Consider no functions a failure too. */ if (!ret) { + cmd_ops->unreg(glob, trigger_ops, trigger_data, file); ret = -ENOENT; - goto out_free; - } else if (ret < 0) - goto out_free; - ret = 0; + } else if (ret > 0) + ret = 0; + + /* Down the counter of trigger_data or free it if not used anymore */ + event_trigger_free(trigger_ops, trigger_data); out: return ret; -- GitLab From 10419b0c16d41e7fa3848e83e6bf7521aa9407ef Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 25 Jul 2018 16:02:06 -0400 Subject: [PATCH 0372/1291] tracing: Fix possible double free in event_enable_trigger_func() commit 15cc78644d0075e76d59476a4467e7143860f660 upstream. There was a case that triggered a double free in event_trigger_callback() due to the called reg() function freeing the trigger_data and then it getting freed again by the error return by the caller. The solution there was to up the trigger_data ref count. Code inspection found that event_enable_trigger_func() has the same issue, but is not as easy to trigger (requires harder to trigger failures). It needs to be solved slightly different as it needs more to clean up when the reg() function fails. Link: http://lkml.kernel.org/r/20180725124008.7008e586@gandalf.local.home Cc: stable@vger.kernel.org Fixes: 7862ad1846e99 ("tracing: Add 'enable_event' and 'disable_event' event trigger commands") Reivewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_trigger.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index d656e32dc853..43254c5e7e16 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1396,6 +1396,9 @@ int event_enable_trigger_func(struct event_command *cmd_ops, goto out; } + /* Up the trigger_data count to make sure nothing frees it on failure */ + event_trigger_init(trigger_ops, trigger_data); + if (trigger) { number = strsep(&trigger, ":"); @@ -1446,6 +1449,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops, goto out_disable; /* Just return zero, not the number of enabled functions */ ret = 0; + event_trigger_free(trigger_ops, trigger_data); out: return ret; @@ -1456,7 +1460,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops, out_free: if (cmd_ops->set_filter) cmd_ops->set_filter(NULL, trigger_data, NULL); - kfree(trigger_data); + event_trigger_free(trigger_ops, trigger_data); kfree(enable_data); goto out; } -- GitLab From f957456878eb3b5fd8c41412686a76d2cce6c504 Mon Sep 17 00:00:00 2001 From: Snild Dolkow Date: Thu, 26 Jul 2018 09:15:39 +0200 Subject: [PATCH 0373/1291] kthread, tracing: Don't expose half-written comm when creating kthreads commit 3e536e222f2930534c252c1cc7ae799c725c5ff9 upstream. There is a window for racing when printing directly to task->comm, allowing other threads to see a non-terminated string. The vsnprintf function fills the buffer, counts the truncated chars, then finally writes the \0 at the end. creator other vsnprintf: fill (not terminated) count the rest trace_sched_waking(p): ... memcpy(comm, p->comm, TASK_COMM_LEN) write \0 The consequences depend on how 'other' uses the string. In our case, it was copied into the tracing system's saved cmdlines, a buffer of adjacent TASK_COMM_LEN-byte buffers (note the 'n' where 0 should be): crash-arm64> x/1024s savedcmd->saved_cmdlines | grep 'evenk' 0xffffffd5b3818640: "irq/497-pwr_evenkworker/u16:12" ...and a strcpy out of there would cause stack corruption: [224761.522292] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffff9bf9783c78 crash-arm64> kbt | grep 'comm\|trace_print_context' #6 0xffffff9bf9783c78 in trace_print_context+0x18c(+396) comm (char [16]) = "irq/497-pwr_even" crash-arm64> rd 0xffffffd4d0e17d14 8 ffffffd4d0e17d14: 2f71726900000000 5f7277702d373934 ....irq/497-pwr_ ffffffd4d0e17d24: 726f776b6e657665 3a3631752f72656b evenkworker/u16: ffffffd4d0e17d34: f9780248ff003231 cede60e0ffffff9b 12..H.x......`.. ffffffd4d0e17d44: cede60c8ffffffd4 00000fffffffffd4 .....`.......... The workaround in e09e28671 (use strlcpy in __trace_find_cmdline) was likely needed because of this same bug. Solved by vsnprintf:ing to a local buffer, then using set_task_comm(). This way, there won't be a window where comm is not terminated. Link: http://lkml.kernel.org/r/20180726071539.188015-1-snild@sony.com Cc: stable@vger.kernel.org Fixes: bc0c38d139ec7 ("ftrace: latency tracer infrastructure") Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Snild Dolkow Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/kthread.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 1ef8f3a5b072..4e6d85b63201 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -311,8 +311,14 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), task = create->result; if (!IS_ERR(task)) { static const struct sched_param param = { .sched_priority = 0 }; + char name[TASK_COMM_LEN]; - vsnprintf(task->comm, sizeof(task->comm), namefmt, args); + /* + * task is already visible to other tasks, so updating + * COMM must be protected. + */ + vsnprintf(name, sizeof(name), namefmt, args); + set_task_comm(task, name); /* * root may have changed our (kthreadd's) priority or CPU mask. * The kernel thread should not inherit these properties. -- GitLab From 86428ec165ee9c25618794b442827ab78b3da087 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Wed, 25 Jul 2018 16:20:38 +0200 Subject: [PATCH 0374/1291] tracing/kprobes: Fix trace_probe flags on enable_trace_kprobe() failure commit 57ea2a34adf40f3a6e88409aafcf803b8945619a upstream. If enable_trace_kprobe fails to enable the probe in enable_k(ret)probe it returns an error, but does not unset the tp flags it set previously. This results in a probe being considered enabled and failures like being unable to remove the probe through kprobe_events file since probes_open() expects every probe to be disabled. Link: http://lkml.kernel.org/r/20180725102826.8300-1-asavkov@redhat.com Link: http://lkml.kernel.org/r/20180725142038.4765-1-asavkov@redhat.com Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 41a7dd420c57 ("tracing/kprobes: Support ftrace_event_file base multibuffer") Acked-by: Masami Hiramatsu Reviewed-by: Josh Poimboeuf Signed-off-by: Artem Savkov Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_kprobe.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index f8d3bd974bcc..474ada34ef26 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -376,11 +376,10 @@ static struct trace_kprobe *find_trace_kprobe(const char *event, static int enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) { + struct event_file_link *link; int ret = 0; if (file) { - struct event_file_link *link; - link = kmalloc(sizeof(*link), GFP_KERNEL); if (!link) { ret = -ENOMEM; @@ -400,6 +399,16 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) else ret = enable_kprobe(&tk->rp.kp); } + + if (ret) { + if (file) { + list_del_rcu(&link->list); + kfree(link); + tk->tp.flags &= ~TP_FLAG_TRACE; + } else { + tk->tp.flags &= ~TP_FLAG_PROFILE; + } + } out: return ret; } -- GitLab From 4681e8820f99aea022801ed94a279d61746b9523 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 25 Jul 2018 22:28:56 -0400 Subject: [PATCH 0375/1291] tracing: Quiet gcc warning about maybe unused link variable commit 2519c1bbe38d7acacc9aacba303ca6f97482ed53 upstream. Commit 57ea2a34adf4 ("tracing/kprobes: Fix trace_probe flags on enable_trace_kprobe() failure") added an if statement that depends on another if statement that gcc doesn't see will initialize the "link" variable and gives the warning: "warning: 'link' may be used uninitialized in this function" It is really a false positive, but to quiet the warning, and also to make sure that it never actually is used uninitialized, initialize the "link" variable to NULL and add an if (!WARN_ON_ONCE(!link)) where the compiler thinks it could be used uninitialized. Cc: stable@vger.kernel.org Fixes: 57ea2a34adf4 ("tracing/kprobes: Fix trace_probe flags on enable_trace_kprobe() failure") Reported-by: kbuild test robot Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_kprobe.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 474ada34ef26..ea20274a105a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -376,7 +376,7 @@ static struct trace_kprobe *find_trace_kprobe(const char *event, static int enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) { - struct event_file_link *link; + struct event_file_link *link = NULL; int ret = 0; if (file) { @@ -402,7 +402,9 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) if (ret) { if (file) { - list_del_rcu(&link->list); + /* Notice the if is true on not WARN() */ + if (!WARN_ON_ONCE(!link)) + list_del_rcu(&link->list); kfree(link); tk->tp.flags &= ~TP_FLAG_TRACE; } else { -- GitLab From c1550e0141351e38af1dc781b18e7d2c5ebd42b4 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 23 Jul 2018 10:18:23 -0400 Subject: [PATCH 0376/1291] arm64: fix vmemmap BUILD_BUG_ON() triggering on !vmemmap setups commit 7b0eb6b41a08fa1fa0d04b1c53becd62b5fbfaee upstream. Arnd reports the following arm64 randconfig build error with the PSI patches that add another page flag: /git/arm-soc/arch/arm64/mm/init.c: In function 'mem_init': /git/arm-soc/include/linux/compiler.h:357:38: error: call to '__compiletime_assert_618' declared with attribute error: BUILD_BUG_ON failed: sizeof(struct page) > (1 << STRUCT_PAGE_MAX_SHIFT) The additional page flag causes other information stored in page->flags to get bumped into their own struct page member: #if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else #define LAST_CPUPID_WIDTH 0 #endif #if defined(CONFIG_NUMA_BALANCING) && LAST_CPUPID_WIDTH == 0 #define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif which in turn causes the struct page size to exceed the size set in STRUCT_PAGE_MAX_SHIFT. This value is an an estimate used to size the VMEMMAP page array according to address space and struct page size. However, the check is performed - and triggers here - on a !VMEMMAP config, which consumes an additional 22 page bits for the sparse section id. When VMEMMAP is enabled, those bits are returned, cpupid doesn't need its own member, and the page passes the VMEMMAP check. Restrict that check to the situation it was meant to check: that we are sizing the VMEMMAP page array correctly. Says Arnd: Further experiments show that the build error already existed before, but was only triggered with larger values of CONFIG_NR_CPU and/or CONFIG_NODES_SHIFT that might be used in actual configurations but not in randconfig builds. With longer CPU and node masks, I could recreate the problem with kernels as old as linux-4.7 when arm64 NUMA support got added. Reported-by: Arnd Bergmann Tested-by: Arnd Bergmann Cc: stable@vger.kernel.org Fixes: 1a2db300348b ("arm64, numa: Add NUMA support for arm64 platforms.") Fixes: 3e1907d5bf5a ("arm64: mm: move vmemmap region right below the linear region") Signed-off-by: Johannes Weiner Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 00e7b900ca41..1190d90e01e6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -651,11 +651,13 @@ void __init mem_init(void) BUILD_BUG_ON(TASK_SIZE_32 > TASK_SIZE_64); #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP /* * Make sure we chose the upper bound of sizeof(struct page) - * correctly. + * correctly when sizing the VMEMMAP array. */ BUILD_BUG_ON(sizeof(struct page) > (1 << STRUCT_PAGE_MAX_SHIFT)); +#endif if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { extern int sysctl_overcommit_memory; -- GitLab From 73990abb1a04a526c011780e1c06b9c80d613e1a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 15 Jun 2018 16:23:38 +0300 Subject: [PATCH 0377/1291] mlxsw: spectrum_switchdev: Fix port_vlan refcounting [ Upstream commit 9e25826ffc942e985b8595b2f1cf2065d3880514 ] Switchdev notifications for addition of SWITCHDEV_OBJ_ID_PORT_VLAN are distributed not only on clean addition, but also when flags on an existing VLAN are changed. mlxsw_sp_bridge_port_vlan_add() calls mlxsw_sp_port_vlan_get() to get at the port_vlan in question, which implicitly references the object. This then leads to discrepancies in reference counting when the VLAN is removed. spectrum.c warns about the problem when the module is removed: [13578.493090] WARNING: CPU: 0 PID: 2454 at drivers/net/ethernet/mellanox/mlxsw/spectrum.c:2973 mlxsw_sp_port_remove+0xfd/0x110 [mlxsw_spectrum] [...] [13578.627106] Call Trace: [13578.629617] mlxsw_sp_fini+0x2a/0xe0 [mlxsw_spectrum] [13578.634748] mlxsw_core_bus_device_unregister+0x3e/0x130 [mlxsw_core] [13578.641290] mlxsw_pci_remove+0x13/0x40 [mlxsw_pci] [13578.646238] pci_device_remove+0x31/0xb0 [13578.650244] device_release_driver_internal+0x14f/0x220 [13578.655562] driver_detach+0x32/0x70 [13578.659183] bus_remove_driver+0x47/0xa0 [13578.663134] pci_unregister_driver+0x1e/0x80 [13578.667486] mlxsw_sp_module_exit+0xc/0x3fa [mlxsw_spectrum] [13578.673207] __x64_sys_delete_module+0x13b/0x1e0 [13578.677888] ? exit_to_usermode_loop+0x78/0x80 [13578.682374] do_syscall_64+0x39/0xe0 [13578.685976] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix by putting the port_vlan when mlxsw_sp_port_vlan_bridge_join() determines it's a flag-only change. Fixes: b3529af6bb0d ("spectrum: Reference count VLAN entries") Signed-off-by: Petr Machata Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 42a6afcaae03..7924f241e3ad 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -912,8 +912,10 @@ mlxsw_sp_port_vlan_bridge_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, int err; /* No need to continue if only VLAN flags were changed */ - if (mlxsw_sp_port_vlan->bridge_port) + if (mlxsw_sp_port_vlan->bridge_port) { + mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); return 0; + } err = mlxsw_sp_port_vlan_fid_join(mlxsw_sp_port_vlan, bridge_port); if (err) -- GitLab From a45f5ee6850b35b8ecd81e8360b153ae685ecac7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 14 Jun 2018 15:27:34 -0700 Subject: [PATCH 0378/1291] kcov: ensure irq code sees a valid area [ Upstream commit c9484b986ef03492357fddd50afbdd02929cfa72 ] Patch series "kcov: fix unexpected faults". These patches fix a few issues where KCOV code could trigger recursive faults, discovered while debugging a patch enabling KCOV for arch/arm: * On CONFIG_PREEMPT kernels, there's a small race window where __sanitizer_cov_trace_pc() can see a bogus kcov_area. * Lazy faulting of the vmalloc area can cause mutual recursion between fault handling code and __sanitizer_cov_trace_pc(). * During the context switch, switching the mm can cause the kcov_area to be transiently unmapped. These are prerequisites for enabling KCOV on arm, but the issues themsevles are generic -- we just happen to avoid them by chance rather than design on x86-64 and arm64. This patch (of 3): For kernels built with CONFIG_PREEMPT, some C code may execute before or after the interrupt handler, while the hardirq count is zero. In these cases, in_task() can return true. A task can be interrupted in the middle of a KCOV_DISABLE ioctl while it resets the task's kcov data via kcov_task_init(). Instrumented code executed during this period will call __sanitizer_cov_trace_pc(), and as in_task() returns true, will inspect t->kcov_mode before trying to write to t->kcov_area. In kcov_init_task() we update t->kcov_{mode,area,size} with plain stores, which may be re-ordered, torn, etc. Thus __sanitizer_cov_trace_pc() may see bogus values for any of these fields, and may attempt to write to memory which is not mapped. Let's avoid this by using WRITE_ONCE() to set t->kcov_mode, with a barrier() to ensure this is ordered before we clear t->kov_{area,size}. This ensures that any code execute while kcov_init_task() is preempted will either see valid values for t->kcov_{area,size}, or will see that t->kcov_mode is KCOV_MODE_DISABLED, and bail out without touching t->kcov_area. Link: http://lkml.kernel.org/r/20180504135535.53744-2-mark.rutland@arm.com Signed-off-by: Mark Rutland Acked-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/kcov.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index b11ef6e51f7e..f1e060b04ef6 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -108,7 +108,8 @@ static void kcov_put(struct kcov *kcov) void kcov_task_init(struct task_struct *t) { - t->kcov_mode = KCOV_MODE_DISABLED; + WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED); + barrier(); t->kcov_size = 0; t->kcov_area = NULL; t->kcov = NULL; -- GitLab From 51b694070738e93ef319c464208a7f524b2b5b15 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 12 Jun 2018 08:57:53 +0200 Subject: [PATCH 0379/1291] xen/netfront: raise max number of slots in xennet_get_responses() [ Upstream commit 57f230ab04d2910a06d17d988f1c4d7586a59113 ] The max number of slots used in xennet_get_responses() is set to MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD). In old kernel-xen MAX_SKB_FRAGS was 18, while nowadays it is 17. This difference is resulting in frequent messages "too many slots" and a reduced network throughput for some workloads (factor 10 below that of a kernel-xen based guest). Replacing MAX_SKB_FRAGS by XEN_NETIF_NR_SLOTS_MIN for calculation of the max number of slots to use solves that problem (tests showed no more messages "too many slots" and throughput was as high as with the kernel-xen based guest system). Replace MAX_SKB_FRAGS-2 by XEN_NETIF_NR_SLOTS_MIN-1 in netfront_tx_slot_available() for making it clearer what is really being tested without actually modifying the tested value. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index f07b9c9bb5ba..04e0765be5e7 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -239,7 +239,7 @@ static void rx_refill_timeout(unsigned long data) static int netfront_tx_slot_available(struct netfront_queue *queue) { return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) < - (NET_TX_RING_SIZE - MAX_SKB_FRAGS - 2); + (NET_TX_RING_SIZE - XEN_NETIF_NR_SLOTS_MIN - 1); } static void xennet_maybe_wake_tx(struct netfront_queue *queue) @@ -790,7 +790,7 @@ static int xennet_get_responses(struct netfront_queue *queue, RING_IDX cons = queue->rx.rsp_cons; struct sk_buff *skb = xennet_get_rx_skb(queue, cons); grant_ref_t ref = xennet_get_rx_ref(queue, cons); - int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD); + int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD); int slots = 1; int err = 0; unsigned long ret; -- GitLab From 0a84c912f575c083e2cdc123bcae93e0847f9e63 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 11 Jun 2018 12:44:55 -0700 Subject: [PATCH 0380/1291] hv_netvsc: fix network namespace issues with VF support [ Upstream commit 7bf7bb37f16a80465ee3bd7c6c966f96f5a075a6 ] When finding the parent netvsc device, the search needs to be across all netvsc device instances (independent of network namespace). Find parent device of VF using upper_dev_get routine which searches only adjacent list. Fixes: e8ff40d4bff1 ("hv_netvsc: improve VF device matching") Signed-off-by: Stephen Hemminger netns aware byref Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/hyperv_net.h | 2 ++ drivers/net/hyperv/netvsc_drv.c | 43 +++++++++++++++------------------ 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index cb250cacf721..e33a6c672a0a 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -724,6 +724,8 @@ struct net_device_context { struct hv_device *device_ctx; /* netvsc_device */ struct netvsc_device __rcu *nvdev; + /* list of netvsc net_devices */ + struct list_head list; /* reconfigure work */ struct delayed_work dwork; /* last reconfig time */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index aeabeb107fed..6a77ef38c549 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -66,6 +66,8 @@ static int debug = -1; module_param(debug, int, S_IRUGO); MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); +static LIST_HEAD(netvsc_dev_list); + static void netvsc_change_rx_flags(struct net_device *net, int change) { struct net_device_context *ndev_ctx = netdev_priv(net); @@ -1749,13 +1751,10 @@ static void netvsc_link_change(struct work_struct *w) static struct net_device *get_netvsc_bymac(const u8 *mac) { - struct net_device *dev; - - ASSERT_RTNL(); + struct net_device_context *ndev_ctx; - for_each_netdev(&init_net, dev) { - if (dev->netdev_ops != &device_ops) - continue; /* not a netvsc device */ + list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) { + struct net_device *dev = hv_get_drvdata(ndev_ctx->device_ctx); if (ether_addr_equal(mac, dev->perm_addr)) return dev; @@ -1766,25 +1765,18 @@ static struct net_device *get_netvsc_bymac(const u8 *mac) static struct net_device *get_netvsc_byref(struct net_device *vf_netdev) { + struct net_device_context *net_device_ctx; struct net_device *dev; - ASSERT_RTNL(); - - for_each_netdev(&init_net, dev) { - struct net_device_context *net_device_ctx; + dev = netdev_master_upper_dev_get(vf_netdev); + if (!dev || dev->netdev_ops != &device_ops) + return NULL; /* not a netvsc device */ - if (dev->netdev_ops != &device_ops) - continue; /* not a netvsc device */ + net_device_ctx = netdev_priv(dev); + if (!rtnl_dereference(net_device_ctx->nvdev)) + return NULL; /* device is removed */ - net_device_ctx = netdev_priv(dev); - if (!rtnl_dereference(net_device_ctx->nvdev)) - continue; /* device is removed */ - - if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev) - return dev; /* a match */ - } - - return NULL; + return dev; } /* Called when VF is injecting data into network stack. @@ -2065,15 +2057,19 @@ static int netvsc_probe(struct hv_device *dev, else net->max_mtu = ETH_DATA_LEN; - ret = register_netdev(net); + rtnl_lock(); + ret = register_netdevice(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); goto register_failed; } - return ret; + list_add(&net_device_ctx->list, &netvsc_dev_list); + rtnl_unlock(); + return 0; register_failed: + rtnl_unlock(); rndis_filter_device_remove(dev, nvdev); rndis_failed: free_percpu(net_device_ctx->vf_stats); @@ -2119,6 +2115,7 @@ static int netvsc_remove(struct hv_device *dev) rndis_filter_device_remove(dev, nvdev); unregister_netdevice(net); + list_del(&ndev_ctx->list); rtnl_unlock(); rcu_read_unlock(); -- GitLab From 44a78f7d175792dbd376cbe77ce1abc3f82e5400 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 11 Jun 2018 15:32:06 -0400 Subject: [PATCH 0381/1291] skip LAYOUTRETURN if layout is invalid [ Upstream commit 93b7f7ad2018d2037559b1d0892417864c78b371 ] Currently, when IO to DS fails, client returns the layout and retries against the MDS. However, then on umounting (inode eviction) it returns the layout again. This is because pnfs_return_layout() was changed in commit d78471d32bb6 ("pnfs/blocklayout: set PNFS_LAYOUTRETURN_ON_ERROR") to always set NFS_LAYOUT_RETURN_REQUESTED so even if we returned the layout, it will be returned again. Instead, let's also check if we have already marked the layout invalid. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pnfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7b34534210ce..96867fb159bf 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1126,7 +1126,7 @@ _pnfs_return_layout(struct inode *ino) LIST_HEAD(tmp_list); nfs4_stateid stateid; int status = 0; - bool send; + bool send, valid_layout; dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); @@ -1147,6 +1147,7 @@ _pnfs_return_layout(struct inode *ino) goto out_put_layout_hdr; spin_lock(&ino->i_lock); } + valid_layout = pnfs_layout_is_valid(lo); pnfs_clear_layoutcommit(ino, &tmp_list); pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); @@ -1160,7 +1161,8 @@ _pnfs_return_layout(struct inode *ino) } /* Don't send a LAYOUTRETURN if list was initially empty */ - if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { + if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) || + !valid_layout) { spin_unlock(&ino->i_lock); dprintk("NFS: %s no layout segments to return\n", __func__); goto out_put_layout_hdr; -- GitLab From 40ff9a54dd9b7d587abb398e40147e9237eec5e3 Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Mon, 11 Jun 2018 16:18:40 +0800 Subject: [PATCH 0382/1291] ALSA: emu10k1: add error handling for snd_ctl_add [ Upstream commit 6d531e7b972cb62ded011c2dfcc2d9f72ea6c421 ] When snd_ctl_add fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling snd_ctl_add. Signed-off-by: Zhouyang Jia Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/emu10k1/emupcm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 2683b9717215..56be1630bd3e 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -1850,7 +1850,9 @@ int snd_emu10k1_pcm_efx(struct snd_emu10k1 *emu, int device) if (!kctl) return -ENOMEM; kctl->id.device = device; - snd_ctl_add(emu->card, kctl); + err = snd_ctl_add(emu->card, kctl); + if (err < 0) + return err; snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(emu->pci), 64*1024, 64*1024); -- GitLab From 42b1df406a296d28f0a86d6a73b4fdca446ae18e Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Mon, 11 Jun 2018 16:04:06 +0800 Subject: [PATCH 0383/1291] ALSA: fm801: add error handling for snd_ctl_add [ Upstream commit ef1ffbe7889e99f5b5cccb41c89e5c94f50f3218 ] When snd_ctl_add fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling snd_ctl_add. Signed-off-by: Zhouyang Jia Acked-by: Andy Shevchenko Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/fm801.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index 73a67bc3586b..e3fb9c61017c 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -1068,11 +1068,19 @@ static int snd_fm801_mixer(struct fm801 *chip) if ((err = snd_ac97_mixer(chip->ac97_bus, &ac97, &chip->ac97_sec)) < 0) return err; } - for (i = 0; i < FM801_CONTROLS; i++) - snd_ctl_add(chip->card, snd_ctl_new1(&snd_fm801_controls[i], chip)); + for (i = 0; i < FM801_CONTROLS; i++) { + err = snd_ctl_add(chip->card, + snd_ctl_new1(&snd_fm801_controls[i], chip)); + if (err < 0) + return err; + } if (chip->multichannel) { - for (i = 0; i < FM801_CONTROLS_MULTI; i++) - snd_ctl_add(chip->card, snd_ctl_new1(&snd_fm801_controls_multi[i], chip)); + for (i = 0; i < FM801_CONTROLS_MULTI; i++) { + err = snd_ctl_add(chip->card, + snd_ctl_new1(&snd_fm801_controls_multi[i], chip)); + if (err < 0) + return err; + } } return 0; } -- GitLab From baad2bf4477003eb27b40a8c608d8af37b187917 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Jun 2018 19:10:31 -0400 Subject: [PATCH 0384/1291] NFSv4.1: Fix the client behaviour on NFS4ERR_SEQ_FALSE_RETRY [ Upstream commit f9312a541050007ec59eb0106273a0a10718cd83 ] If the server returns NFS4ERR_SEQ_FALSE_RETRY or NFS4ERR_RETRY_UNCACHED_REP, then it thinks we're trying to replay an existing request. If so, then let's just bump the sequence ID and retry the operation. Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 928bbc397818..df0455cf03af 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -745,6 +745,13 @@ static int nfs41_sequence_process(struct rpc_task *task, slot->slot_nr, slot->seq_nr); goto out_retry; + case -NFS4ERR_RETRY_UNCACHED_REP: + case -NFS4ERR_SEQ_FALSE_RETRY: + /* + * The server thinks we tried to replay a request. + * Retry the call after bumping the sequence ID. + */ + goto retry_new_seq; case -NFS4ERR_BADSLOT: /* * The slot id we used was probably retired. Try again @@ -769,10 +776,6 @@ static int nfs41_sequence_process(struct rpc_task *task, goto retry_nowait; } goto session_recover; - case -NFS4ERR_SEQ_FALSE_RETRY: - if (interrupted) - goto retry_new_seq; - goto session_recover; default: /* Just update the slot sequence no. */ slot->seq_done = 1; -- GitLab From 5a47fe3efd4dba912fce172e1f3e4b823cdf1c9d Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 8 Jun 2018 16:31:46 -0400 Subject: [PATCH 0385/1291] nfsd: fix potential use-after-free in nfsd4_decode_getdeviceinfo [ Upstream commit 3171822fdcdd6e6d536047c425af6dc7a92dc585 ] When running a fuzz tester against a KASAN-enabled kernel, the following splat periodically occurs. The problem occurs when the test sends a GETDEVICEINFO request with a malformed xdr array (size but no data) for gdia_notify_types and the array size is > 0x3fffffff, which results in an overflow in the value of nbytes which is passed to read_buf(). If the array size is 0x40000000, 0x80000000, or 0xc0000000, then after the overflow occurs, the value of nbytes 0, and when that happens the pointer returned by read_buf() points to the end of the xdr data (i.e. argp->end) when really it should be returning NULL. Fix this by returning NFS4ERR_BAD_XDR if the array size is > 1000 (this value is arbitrary, but it's the same threshold used by nfsd4_decode_bitmap()... in could really be any value >= 1 since it's expected to get at most a single bitmap in gdia_notify_types). [ 119.256854] ================================================================== [ 119.257611] BUG: KASAN: use-after-free in nfsd4_decode_getdeviceinfo+0x5a4/0x5b0 [nfsd] [ 119.258422] Read of size 4 at addr ffff880113ada000 by task nfsd/538 [ 119.259146] CPU: 0 PID: 538 Comm: nfsd Not tainted 4.17.0+ #1 [ 119.259662] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.fc25 04/01/2014 [ 119.261202] Call Trace: [ 119.262265] dump_stack+0x71/0xab [ 119.263371] print_address_description+0x6a/0x270 [ 119.264609] kasan_report+0x258/0x380 [ 119.265854] ? nfsd4_decode_getdeviceinfo+0x5a4/0x5b0 [nfsd] [ 119.267291] nfsd4_decode_getdeviceinfo+0x5a4/0x5b0 [nfsd] [ 119.268549] ? nfs4svc_decode_compoundargs+0xa5b/0x13c0 [nfsd] [ 119.269873] ? nfsd4_decode_sequence+0x490/0x490 [nfsd] [ 119.271095] nfs4svc_decode_compoundargs+0xa5b/0x13c0 [nfsd] [ 119.272393] ? nfsd4_release_compoundargs+0x1b0/0x1b0 [nfsd] [ 119.273658] nfsd_dispatch+0x183/0x850 [nfsd] [ 119.274918] svc_process+0x161c/0x31a0 [sunrpc] [ 119.276172] ? svc_printk+0x190/0x190 [sunrpc] [ 119.277386] ? svc_xprt_release+0x451/0x680 [sunrpc] [ 119.278622] nfsd+0x2b9/0x430 [nfsd] [ 119.279771] ? nfsd_destroy+0x1c0/0x1c0 [nfsd] [ 119.281157] kthread+0x2db/0x390 [ 119.282347] ? kthread_create_worker_on_cpu+0xc0/0xc0 [ 119.283756] ret_from_fork+0x35/0x40 [ 119.286041] Allocated by task 436: [ 119.287525] kasan_kmalloc+0xa0/0xd0 [ 119.288685] kmem_cache_alloc+0xe9/0x1f0 [ 119.289900] get_empty_filp+0x7b/0x410 [ 119.291037] path_openat+0xca/0x4220 [ 119.292242] do_filp_open+0x182/0x280 [ 119.293411] do_sys_open+0x216/0x360 [ 119.294555] do_syscall_64+0xa0/0x2f0 [ 119.295721] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 119.298068] Freed by task 436: [ 119.299271] __kasan_slab_free+0x130/0x180 [ 119.300557] kmem_cache_free+0x78/0x210 [ 119.301823] rcu_process_callbacks+0x35b/0xbd0 [ 119.303162] __do_softirq+0x192/0x5ea [ 119.305443] The buggy address belongs to the object at ffff880113ada000 which belongs to the cache filp of size 256 [ 119.308556] The buggy address is located 0 bytes inside of 256-byte region [ffff880113ada000, ffff880113ada100) [ 119.311376] The buggy address belongs to the page: [ 119.312728] page:ffffea00044eb680 count:1 mapcount:0 mapping:0000000000000000 index:0xffff880113ada780 [ 119.314428] flags: 0x17ffe000000100(slab) [ 119.315740] raw: 0017ffe000000100 0000000000000000 ffff880113ada780 00000001000c0001 [ 119.317379] raw: ffffea0004553c60 ffffea00045c11e0 ffff88011b167e00 0000000000000000 [ 119.319050] page dumped because: kasan: bad access detected [ 119.321652] Memory state around the buggy address: [ 119.322993] ffff880113ad9f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 119.324515] ffff880113ad9f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 119.326087] >ffff880113ada000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 119.327547] ^ [ 119.328730] ffff880113ada080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 119.330218] ffff880113ada100: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb [ 119.331740] ================================================================== Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index f6588cc6816c..c1e923334012 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1586,6 +1586,8 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, gdev->gd_maxcount = be32_to_cpup(p++); num = be32_to_cpup(p++); if (num) { + if (num > 1000) + goto xdr_error; READ_BUF(4 * num); gdev->gd_notify_types = be32_to_cpup(p++); for (i = 1; i < num; i++) { -- GitLab From ca014df110e97a4765f7e581376b414f26496db7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 11 Apr 2018 11:15:48 +0200 Subject: [PATCH 0386/1291] vfio: platform: Fix reset module leak in error path [ Upstream commit 28a68387888997e8a7fa57940ea5d55f2e16b594 ] If the IOMMU group setup fails, the reset module is not released. Fixes: b5add544d677d363 ("vfio, platform: make reset driver a requirement by default") Signed-off-by: Geert Uytterhoeven Reviewed-by: Eric Auger Reviewed-by: Simon Horman Acked-by: Eric Auger Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/platform/vfio_platform_common.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 4c27f4be3c3d..aa9e792110e3 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -681,18 +681,23 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, group = vfio_iommu_group_get(dev); if (!group) { pr_err("VFIO: No IOMMU group for device %s\n", vdev->name); - return -EINVAL; + ret = -EINVAL; + goto put_reset; } ret = vfio_add_group_dev(dev, &vfio_platform_ops, vdev); - if (ret) { - vfio_iommu_group_put(group, dev); - return ret; - } + if (ret) + goto put_iommu; mutex_init(&vdev->igate); return 0; + +put_iommu: + vfio_iommu_group_put(group, dev); +put_reset: + vfio_platform_put_reset(vdev); + return ret; } EXPORT_SYMBOL_GPL(vfio_platform_probe_common); -- GitLab From 8f38152f2ae22b09589a6160086f5d71881ae30b Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 15 May 2018 13:53:55 -0600 Subject: [PATCH 0387/1291] vfio/mdev: Check globally for duplicate devices [ Upstream commit 002fe996f67f4f46d8917b14cfb6e4313c20685a ] When we create an mdev device, we check for duplicates against the parent device and return -EEXIST if found, but the mdev device namespace is global since we'll link all devices from the bus. We do catch this later in sysfs_do_create_link_sd() to return -EEXIST, but with it comes a kernel warning and stack trace for trying to create duplicate sysfs links, which makes it an undesirable response. Therefore we should really be looking for duplicates across all mdev parent devices, or as implemented here, against our mdev device list. Using mdev_list to prevent duplicates means that we can remove mdev_parent.lock, but in order not to serialize mdev device creation and removal globally, we add mdev_device.active which allows UUIDs to be reserved such that we can drop the mdev_list_lock before the mdev device is fully in place. Two behavioral notes; first, mdev_parent.lock had the side-effect of serializing mdev create and remove ops per parent device. This was an implementation detail, not an intentional guarantee provided to the mdev vendor drivers. Vendor drivers can trivially provide this serialization internally if necessary. Second, review comments note the new -EAGAIN behavior when the device, and in particular the remove attribute, becomes visible in sysfs. If a remove is triggered prior to completion of mdev_device_create() the user will see a -EAGAIN error. While the errno is different, receiving an error during this period is not, the previous implementation returned -ENODEV for the same condition. Furthermore, the consistency to the user is improved in the case where mdev_device_remove_ops() returns error. Previously concurrent calls to mdev_device_remove() could see the device disappear with -ENODEV and return in the case of error. Now a user would see -EAGAIN while the device is in this transitory state. Reviewed-by: Kirti Wankhede Reviewed-by: Cornelia Huck Acked-by: Halil Pasic Acked-by: Zhenyu Wang Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- Documentation/vfio-mediated-device.txt | 5 ++ drivers/vfio/mdev/mdev_core.c | 102 +++++++++---------------- drivers/vfio/mdev/mdev_private.h | 2 +- 3 files changed, 42 insertions(+), 67 deletions(-) diff --git a/Documentation/vfio-mediated-device.txt b/Documentation/vfio-mediated-device.txt index 1b3950346532..c3f69bcaf96e 100644 --- a/Documentation/vfio-mediated-device.txt +++ b/Documentation/vfio-mediated-device.txt @@ -145,6 +145,11 @@ The functions in the mdev_parent_ops structure are as follows: * create: allocate basic resources in a driver for a mediated device * remove: free resources in a driver when a mediated device is destroyed +(Note that mdev-core provides no implicit serialization of create/remove +callbacks per mdev parent device, per mdev type, or any other categorization. +Vendor drivers are expected to be fully asynchronous in this respect or +provide their own internal resource protection.) + The callbacks in the mdev_parent_ops structure are as follows: * open: open callback of mediated device diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index 126991046eb7..0212f0ee8aea 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -66,34 +66,6 @@ uuid_le mdev_uuid(struct mdev_device *mdev) } EXPORT_SYMBOL(mdev_uuid); -static int _find_mdev_device(struct device *dev, void *data) -{ - struct mdev_device *mdev; - - if (!dev_is_mdev(dev)) - return 0; - - mdev = to_mdev_device(dev); - - if (uuid_le_cmp(mdev->uuid, *(uuid_le *)data) == 0) - return 1; - - return 0; -} - -static bool mdev_device_exist(struct mdev_parent *parent, uuid_le uuid) -{ - struct device *dev; - - dev = device_find_child(parent->dev, &uuid, _find_mdev_device); - if (dev) { - put_device(dev); - return true; - } - - return false; -} - /* Should be called holding parent_list_lock */ static struct mdev_parent *__find_parent_device(struct device *dev) { @@ -221,7 +193,6 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) } kref_init(&parent->ref); - mutex_init(&parent->lock); parent->dev = dev; parent->ops = ops; @@ -297,6 +268,10 @@ static void mdev_device_release(struct device *dev) { struct mdev_device *mdev = to_mdev_device(dev); + mutex_lock(&mdev_list_lock); + list_del(&mdev->next); + mutex_unlock(&mdev_list_lock); + dev_dbg(&mdev->dev, "MDEV: destroying\n"); kfree(mdev); } @@ -304,7 +279,7 @@ static void mdev_device_release(struct device *dev) int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) { int ret; - struct mdev_device *mdev; + struct mdev_device *mdev, *tmp; struct mdev_parent *parent; struct mdev_type *type = to_mdev_type(kobj); @@ -312,21 +287,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) if (!parent) return -EINVAL; - mutex_lock(&parent->lock); + mutex_lock(&mdev_list_lock); /* Check for duplicate */ - if (mdev_device_exist(parent, uuid)) { - ret = -EEXIST; - goto create_err; + list_for_each_entry(tmp, &mdev_list, next) { + if (!uuid_le_cmp(tmp->uuid, uuid)) { + mutex_unlock(&mdev_list_lock); + ret = -EEXIST; + goto mdev_fail; + } } mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); if (!mdev) { + mutex_unlock(&mdev_list_lock); ret = -ENOMEM; - goto create_err; + goto mdev_fail; } memcpy(&mdev->uuid, &uuid, sizeof(uuid_le)); + list_add(&mdev->next, &mdev_list); + mutex_unlock(&mdev_list_lock); + mdev->parent = parent; kref_init(&mdev->ref); @@ -338,35 +320,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) ret = device_register(&mdev->dev); if (ret) { put_device(&mdev->dev); - goto create_err; + goto mdev_fail; } ret = mdev_device_create_ops(kobj, mdev); if (ret) - goto create_failed; + goto create_fail; ret = mdev_create_sysfs_files(&mdev->dev, type); if (ret) { mdev_device_remove_ops(mdev, true); - goto create_failed; + goto create_fail; } mdev->type_kobj = kobj; + mdev->active = true; dev_dbg(&mdev->dev, "MDEV: created\n"); - mutex_unlock(&parent->lock); - - mutex_lock(&mdev_list_lock); - list_add(&mdev->next, &mdev_list); - mutex_unlock(&mdev_list_lock); - - return ret; + return 0; -create_failed: +create_fail: device_unregister(&mdev->dev); - -create_err: - mutex_unlock(&parent->lock); +mdev_fail: mdev_put_parent(parent); return ret; } @@ -377,44 +352,39 @@ int mdev_device_remove(struct device *dev, bool force_remove) struct mdev_parent *parent; struct mdev_type *type; int ret; - bool found = false; mdev = to_mdev_device(dev); mutex_lock(&mdev_list_lock); list_for_each_entry(tmp, &mdev_list, next) { - if (tmp == mdev) { - found = true; + if (tmp == mdev) break; - } } - if (found) - list_del(&mdev->next); + if (tmp != mdev) { + mutex_unlock(&mdev_list_lock); + return -ENODEV; + } - mutex_unlock(&mdev_list_lock); + if (!mdev->active) { + mutex_unlock(&mdev_list_lock); + return -EAGAIN; + } - if (!found) - return -ENODEV; + mdev->active = false; + mutex_unlock(&mdev_list_lock); type = to_mdev_type(mdev->type_kobj); parent = mdev->parent; - mutex_lock(&parent->lock); ret = mdev_device_remove_ops(mdev, force_remove); if (ret) { - mutex_unlock(&parent->lock); - - mutex_lock(&mdev_list_lock); - list_add(&mdev->next, &mdev_list); - mutex_unlock(&mdev_list_lock); - + mdev->active = true; return ret; } mdev_remove_sysfs_files(dev, type); device_unregister(dev); - mutex_unlock(&parent->lock); mdev_put_parent(parent); return 0; diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h index a9cefd70a705..b5819b7d7ef7 100644 --- a/drivers/vfio/mdev/mdev_private.h +++ b/drivers/vfio/mdev/mdev_private.h @@ -20,7 +20,6 @@ struct mdev_parent { struct device *dev; const struct mdev_parent_ops *ops; struct kref ref; - struct mutex lock; struct list_head next; struct kset *mdev_types_kset; struct list_head type_list; @@ -34,6 +33,7 @@ struct mdev_device { struct kref ref; struct list_head next; struct kobject *type_kobj; + bool active; }; #define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) -- GitLab From 827faa4eb5668e25baf7f3752dc7ae7fd46894c2 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 11 May 2018 09:05:02 -0600 Subject: [PATCH 0388/1291] vfio/type1: Fix task tracking for QEMU vCPU hotplug [ Upstream commit 48d8476b41eed63567dd2f0ad125c895b9ac648a ] MAP_DMA ioctls might be called from various threads within a process, for example when using QEMU, the vCPU threads are often generating these calls and we therefore take a reference to that vCPU task. However, QEMU also supports vCPU hotplug on some machines and the task that called MAP_DMA may have exited by the time UNMAP_DMA is called, resulting in the mm_struct pointer being NULL and thus a failure to match against the existing mapping. To resolve this, we instead take a reference to the thread group_leader, which has the same mm_struct and resource limits, but is less likely exit, at least in the QEMU case. A difficulty here is guaranteeing that the capabilities of the group_leader match that of the calling thread, which we resolve by tracking CAP_IPC_LOCK at the time of calling rather than at an indeterminate time in the future. Potentially this also results in better efficiency as this is now recorded once per MAP_DMA ioctl. Reported-by: Xu Yandong Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio_iommu_type1.c | 73 +++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index d639378e36ac..50eeb74ddc0a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -83,6 +83,7 @@ struct vfio_dma { size_t size; /* Map size (bytes) */ int prot; /* IOMMU_READ/WRITE */ bool iommu_mapped; + bool lock_cap; /* capable(CAP_IPC_LOCK) */ struct task_struct *task; struct rb_root pfn_list; /* Ex-user pinned pfn list */ }; @@ -246,29 +247,25 @@ static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn) return ret; } -static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap) +static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async) { struct mm_struct *mm; - bool is_current; int ret; if (!npage) return 0; - is_current = (task->mm == current->mm); - - mm = is_current ? task->mm : get_task_mm(task); + mm = async ? get_task_mm(dma->task) : dma->task->mm; if (!mm) return -ESRCH; /* process exited */ ret = down_write_killable(&mm->mmap_sem); if (!ret) { if (npage > 0) { - if (lock_cap ? !*lock_cap : - !has_capability(task, CAP_IPC_LOCK)) { + if (!dma->lock_cap) { unsigned long limit; - limit = task_rlimit(task, + limit = task_rlimit(dma->task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; if (mm->locked_vm + npage > limit) @@ -282,7 +279,7 @@ static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap) up_write(&mm->mmap_sem); } - if (!is_current) + if (async) mmput(mm); return ret; @@ -391,7 +388,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, */ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, long npage, unsigned long *pfn_base, - bool lock_cap, unsigned long limit) + unsigned long limit) { unsigned long pfn = 0; long ret, pinned = 0, lock_acct = 0; @@ -414,7 +411,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, * pages are already counted against the user. */ if (!rsvd && !vfio_find_vpfn(dma, iova)) { - if (!lock_cap && current->mm->locked_vm + 1 > limit) { + if (!dma->lock_cap && current->mm->locked_vm + 1 > limit) { put_pfn(*pfn_base, dma->prot); pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, limit << PAGE_SHIFT); @@ -440,7 +437,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, } if (!rsvd && !vfio_find_vpfn(dma, iova)) { - if (!lock_cap && + if (!dma->lock_cap && current->mm->locked_vm + lock_acct + 1 > limit) { put_pfn(pfn, dma->prot); pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", @@ -453,7 +450,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, } out: - ret = vfio_lock_acct(current, lock_acct, &lock_cap); + ret = vfio_lock_acct(dma, lock_acct, false); unpin_out: if (ret) { @@ -484,7 +481,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, } if (do_accounting) - vfio_lock_acct(dma->task, locked - unlocked, NULL); + vfio_lock_acct(dma, locked - unlocked, true); return unlocked; } @@ -501,7 +498,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base); if (!ret && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) { - ret = vfio_lock_acct(dma->task, 1, NULL); + ret = vfio_lock_acct(dma, 1, true); if (ret) { put_pfn(*pfn_base, dma->prot); if (ret == -ENOMEM) @@ -528,7 +525,7 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, unlocked = vfio_iova_put_vfio_pfn(dma, vpfn); if (do_accounting) - vfio_lock_acct(dma->task, -unlocked, NULL); + vfio_lock_acct(dma, -unlocked, true); return unlocked; } @@ -723,7 +720,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, dma->iommu_mapped = false; if (do_accounting) { - vfio_lock_acct(dma->task, -unlocked, NULL); + vfio_lock_acct(dma, -unlocked, true); return 0; } return unlocked; @@ -935,14 +932,12 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, size_t size = map_size; long npage; unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - bool lock_cap = capable(CAP_IPC_LOCK); int ret = 0; while (size) { /* Pin a contiguous chunk of memory */ npage = vfio_pin_pages_remote(dma, vaddr + dma->size, - size >> PAGE_SHIFT, &pfn, - lock_cap, limit); + size >> PAGE_SHIFT, &pfn, limit); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -1017,8 +1012,36 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, dma->iova = iova; dma->vaddr = vaddr; dma->prot = prot; - get_task_struct(current); - dma->task = current; + + /* + * We need to be able to both add to a task's locked memory and test + * against the locked memory limit and we need to be able to do both + * outside of this call path as pinning can be asynchronous via the + * external interfaces for mdev devices. RLIMIT_MEMLOCK requires a + * task_struct and VM locked pages requires an mm_struct, however + * holding an indefinite mm reference is not recommended, therefore we + * only hold a reference to a task. We could hold a reference to + * current, however QEMU uses this call path through vCPU threads, + * which can be killed resulting in a NULL mm and failure in the unmap + * path when called via a different thread. Avoid this problem by + * using the group_leader as threads within the same group require + * both CLONE_THREAD and CLONE_VM and will therefore use the same + * mm_struct. + * + * Previously we also used the task for testing CAP_IPC_LOCK at the + * time of pinning and accounting, however has_capability() makes use + * of real_cred, a copy-on-write field, so we can't guarantee that it + * matches group_leader, or in fact that it might not change by the + * time it's evaluated. If a process were to call MAP_DMA with + * CAP_IPC_LOCK but later drop it, it doesn't make sense that they + * possibly see different results for an iommu_mapped vfio_dma vs + * externally mapped. Therefore track CAP_IPC_LOCK in vfio_dma at the + * time of calling MAP_DMA. + */ + get_task_struct(current->group_leader); + dma->task = current->group_leader; + dma->lock_cap = capable(CAP_IPC_LOCK); + dma->pfn_list = RB_ROOT; /* Insert zero-sized and grow as we map chunks of it */ @@ -1053,7 +1076,6 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, struct vfio_domain *d; struct rb_node *n; unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - bool lock_cap = capable(CAP_IPC_LOCK); int ret; /* Arbitrarily pick the first domain in the list for lookups */ @@ -1100,8 +1122,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, npage = vfio_pin_pages_remote(dma, vaddr, n >> PAGE_SHIFT, - &pfn, lock_cap, - limit); + &pfn, limit); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -1378,7 +1399,7 @@ static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu) if (!is_invalid_reserved_pfn(vpfn->pfn)) locked++; } - vfio_lock_acct(dma->task, locked - unlocked, NULL); + vfio_lock_acct(dma, locked - unlocked, true); } } -- GitLab From 9691035cbf723c8ce7247eb5889d03b658871492 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 7 Jun 2018 17:10:34 -0700 Subject: [PATCH 0389/1291] kernel/hung_task.c: show all hung tasks before panic [ Upstream commit 401c636a0eeb0d51862fce222da1bf08e3a0ffd0 ] When we get a hung task it can often be valuable to see _all_ the hung tasks on the system before calling panic(). Quoting from https://syzkaller.appspot.com/text?tag=CrashReport&id=5316056503549952 ---------------------------------------- INFO: task syz-executor0:6540 blocked for more than 120 seconds. Not tainted 4.16.0+ #13 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. syz-executor0 D23560 6540 4521 0x80000004 Call Trace: context_switch kernel/sched/core.c:2848 [inline] __schedule+0x8fb/0x1ef0 kernel/sched/core.c:3490 schedule+0xf5/0x430 kernel/sched/core.c:3549 schedule_preempt_disabled+0x10/0x20 kernel/sched/core.c:3607 __mutex_lock_common kernel/locking/mutex.c:833 [inline] __mutex_lock+0xb7f/0x1810 kernel/locking/mutex.c:893 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908 lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355 __blkdev_driver_ioctl block/ioctl.c:303 [inline] blkdev_ioctl+0x1759/0x1e00 block/ioctl.c:601 ioctl_by_bdev+0xa5/0x110 fs/block_dev.c:2060 isofs_get_last_session fs/isofs/inode.c:567 [inline] isofs_fill_super+0x2ba9/0x3bc0 fs/isofs/inode.c:660 mount_bdev+0x2b7/0x370 fs/super.c:1119 isofs_mount+0x34/0x40 fs/isofs/inode.c:1560 mount_fs+0x66/0x2d0 fs/super.c:1222 vfs_kern_mount.part.26+0xc6/0x4a0 fs/namespace.c:1037 vfs_kern_mount fs/namespace.c:2514 [inline] do_new_mount fs/namespace.c:2517 [inline] do_mount+0xea4/0x2b90 fs/namespace.c:2847 ksys_mount+0xab/0x120 fs/namespace.c:3063 SYSC_mount fs/namespace.c:3077 [inline] SyS_mount+0x39/0x50 fs/namespace.c:3074 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 (...snipped...) Showing all locks held in the system: (...snipped...) 2 locks held by syz-executor0/6540: #0: 00000000566d4c39 (&type->s_umount_key#49/1){+.+.}, at: alloc_super fs/super.c:211 [inline] #0: 00000000566d4c39 (&type->s_umount_key#49/1){+.+.}, at: sget_userns+0x3b2/0xe60 fs/super.c:502 /* down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); */ #1: 0000000043ca8836 (&lo->lo_ctl_mutex/1){+.+.}, at: lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355 /* mutex_lock_nested(&lo->lo_ctl_mutex, 1); */ (...snipped...) 3 locks held by syz-executor7/6541: #0: 0000000043ca8836 (&lo->lo_ctl_mutex/1){+.+.}, at: lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355 /* mutex_lock_nested(&lo->lo_ctl_mutex, 1); */ #1: 000000007bf3d3f9 (&bdev->bd_mutex){+.+.}, at: blkdev_reread_part+0x1e/0x40 block/ioctl.c:192 #2: 00000000566d4c39 (&type->s_umount_key#50){.+.+}, at: __get_super.part.10+0x1d3/0x280 fs/super.c:663 /* down_read(&sb->s_umount); */ ---------------------------------------- When reporting an AB-BA deadlock like shown above, it would be nice if trace of PID=6541 is printed as well as trace of PID=6540 before calling panic(). Showing hung tasks up to /proc/sys/kernel/hung_task_warnings could delay calling panic() but normally there should not be so many hung tasks. Link: http://lkml.kernel.org/r/201804050705.BHE57833.HVFOFtSOMQJFOL@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Acked-by: Paul E. McKenney Acked-by: Dmitry Vyukov Cc: Vegard Nossum Cc: Mandeep Singh Baines Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/hung_task.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 751593ed7c0b..32b479468e4d 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -44,6 +44,7 @@ int __read_mostly sysctl_hung_task_warnings = 10; static int __read_mostly did_panic; static bool hung_task_show_lock; +static bool hung_task_call_panic; static struct task_struct *watchdog_task; @@ -127,10 +128,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) touch_nmi_watchdog(); if (sysctl_hung_task_panic) { - if (hung_task_show_lock) - debug_show_all_locks(); - trigger_all_cpu_backtrace(); - panic("hung_task: blocked tasks"); + hung_task_show_lock = true; + hung_task_call_panic = true; } } @@ -193,6 +192,10 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) rcu_read_unlock(); if (hung_task_show_lock) debug_show_all_locks(); + if (hung_task_call_panic) { + trigger_all_cpu_backtrace(); + panic("hung_task: blocked tasks"); + } } static long hung_timeout_jiffies(unsigned long last_checked, -- GitLab From 9e1a1fc0cd9be4d82bd9e0c1f8618c4e13ee2530 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 7 Jun 2018 17:07:39 -0700 Subject: [PATCH 0390/1291] mm: /proc/pid/pagemap: hide swap entries from unprivileged users [ Upstream commit ab6ecf247a9321e3180e021a6a60164dee53ab2e ] In commit ab676b7d6fbf ("pagemap: do not leak physical addresses to non-privileged userspace"), the /proc/PID/pagemap is restricted to be readable only by CAP_SYS_ADMIN to address some security issue. In commit 1c90308e7a77 ("pagemap: hide physical addresses from non-privileged users"), the restriction is relieved to make /proc/PID/pagemap readable, but hide the physical addresses for non-privileged users. But the swap entries are readable for non-privileged users too. This has some security issues. For example, for page under migrating, the swap entry has physical address information. So, in this patch, the swap entries are hided for non-privileged users too. Link: http://lkml.kernel.org/r/20180508012745.7238-1-ying.huang@intel.com Fixes: 1c90308e7a77 ("pagemap: hide physical addresses from non-privileged users") Signed-off-by: "Huang, Ying" Suggested-by: Kirill A. Shutemov Reviewed-by: Naoya Horiguchi Reviewed-by: Konstantin Khlebnikov Acked-by: Michal Hocko Cc: Konstantin Khlebnikov Cc: Andrei Vagin Cc: Jerome Glisse Cc: Daniel Colascione Cc: Zi Yan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/proc/task_mmu.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 6f337fff38c4..519522d39bde 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1275,8 +1275,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, if (pte_swp_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; entry = pte_to_swp_entry(pte); - frame = swp_type(entry) | - (swp_offset(entry) << MAX_SWAPFILES_SHIFT); + if (pm->show_pfn) + frame = swp_type(entry) | + (swp_offset(entry) << MAX_SWAPFILES_SHIFT); flags |= PM_SWAP; if (is_migration_entry(entry)) page = migration_entry_to_page(entry); @@ -1327,11 +1328,14 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION else if (is_swap_pmd(pmd)) { swp_entry_t entry = pmd_to_swp_entry(pmd); - unsigned long offset = swp_offset(entry); + unsigned long offset; - offset += (addr & ~PMD_MASK) >> PAGE_SHIFT; - frame = swp_type(entry) | - (offset << MAX_SWAPFILES_SHIFT); + if (pm->show_pfn) { + offset = swp_offset(entry) + + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + frame = swp_type(entry) | + (offset << MAX_SWAPFILES_SHIFT); + } flags |= PM_SWAP; if (pmd_swp_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; @@ -1349,10 +1353,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, err = add_to_pagemap(addr, &pme, pm); if (err) break; - if (pm->show_pfn && (flags & PM_PRESENT)) - frame++; - else if (flags & PM_SWAP) - frame += (1 << MAX_SWAPFILES_SHIFT); + if (pm->show_pfn) { + if (flags & PM_PRESENT) + frame++; + else if (flags & PM_SWAP) + frame += (1 << MAX_SWAPFILES_SHIFT); + } } spin_unlock(ptl); return err; -- GitLab From b9d1724cf618cea52e268a61b28096d8b3a422e4 Mon Sep 17 00:00:00 2001 From: Chintan Pandya Date: Thu, 7 Jun 2018 17:06:50 -0700 Subject: [PATCH 0391/1291] mm: vmalloc: avoid racy handling of debugobjects in vunmap [ Upstream commit f3c01d2f3ade6790db67f80fef60df84424f8964 ] Currently, __vunmap flow is, 1) Release the VM area 2) Free the debug objects corresponding to that vm area. This leave some race window open. 1) Release the VM area 1.5) Some other client gets the same vm area 1.6) This client allocates new debug objects on the same vm area 2) Free the debug objects corresponding to this vm area. Here, we actually free 'other' client's debug objects. Fix this by freeing the debug objects first and then releasing the VM area. Link: http://lkml.kernel.org/r/1523961828-9485-2-git-send-email-cpandya@codeaurora.org Signed-off-by: Chintan Pandya Reviewed-by: Andrew Morton Cc: Ard Biesheuvel Cc: Byungchul Park Cc: Catalin Marinas Cc: Florian Fainelli Cc: Johannes Weiner Cc: Laura Abbott Cc: Vlastimil Babka Cc: Wei Yang Cc: Yisheng Xie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/vmalloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ebff729cc956..9ff21a12ea00 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1519,7 +1519,7 @@ static void __vunmap(const void *addr, int deallocate_pages) addr)) return; - area = remove_vm_area(addr); + area = find_vmap_area((unsigned long)addr)->vm; if (unlikely(!area)) { WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", addr); @@ -1529,6 +1529,7 @@ static void __vunmap(const void *addr, int deallocate_pages) debug_check_no_locks_freed(addr, get_vm_area_size(area)); debug_check_no_obj_freed(addr, get_vm_area_size(area)); + remove_vm_area(addr); if (deallocate_pages) { int i; -- GitLab From 191d00f8c8726320814ad4796c1818432edacb2c Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 7 Jun 2018 17:05:17 -0700 Subject: [PATCH 0392/1291] mm/slub.c: add __printf verification to slab_err() [ Upstream commit a38965bf941b7c2af50de09c96bc5f03e136caef ] __printf is useful to verify format and arguments. Remove the following warning (with W=1): mm/slub.c:721:2: warning: function might be possible candidate for `gnu_printf' format attribute [-Wsuggest-attribute=format] Link: http://lkml.kernel.org/r/20180505200706.19986-1-malat@debian.org Signed-off-by: Mathieu Malaterre Reviewed-by: Andrew Morton Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index c38e71cea6d3..10e54c4acd19 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -708,7 +708,7 @@ void object_err(struct kmem_cache *s, struct page *page, print_trailer(s, page, object); } -static void slab_err(struct kmem_cache *s, struct page *page, +static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page, const char *fmt, ...) { va_list args; -- GitLab From 6a929b97b7770573728611551568a4399e3c2da7 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 5 Jun 2018 23:09:14 +0200 Subject: [PATCH 0393/1291] rtc: ensure rtc_set_alarm fails when alarms are not supported [ Upstream commit abfdff44bc38e9e2ef7929f633fb8462632299d4 ] When using RTC_ALM_SET or RTC_WKALM_SET with rtc_wkalrm.enabled not set, rtc_timer_enqueue() is not called and rtc_set_alarm() may succeed but the subsequent RTC_AIE_ON ioctl will fail. RTC_ALM_READ would also fail in that case. Ensure rtc_set_alarm() fails when alarms are not supported to avoid letting programs think the alarms are working for a particular RTC when they are not. Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/interface.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 9eb32ead63db..e4f951e968a4 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -359,6 +359,11 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { int err; + if (!rtc->ops) + return -ENODEV; + else if (!rtc->ops->set_alarm) + return -EINVAL; + err = rtc_valid_tm(&alarm->time); if (err != 0) return err; -- GitLab From a47ece2b17c2e28524f8434857e0deab72ada33e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 5 Jun 2018 14:14:16 +0200 Subject: [PATCH 0394/1291] perf tools: Fix pmu events parsing rule [ Upstream commit ceac7b79df7bd67ef9aaf464b0179a2686aff4ee ] Currently all the event parsing fails end up in the event_pmu rule, and display misleading help like: $ perf stat -e inst kill event syntax error: 'inst' \___ Cannot find PMU `inst'. Missing kernel support? ... The reason is that the event_pmu is too strong and match also single string. Changing it to force the '/' separators to be part of the rule, and getting the proper error now: $ perf stat -e inst kill event syntax error: 'inst' \___ parser error Run 'perf list' for a list of valid events ... Suggested-by: Adrian Hunter Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180605121416.31645-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/parse-events.y | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index e81a20ea8d7d..988310cd3049 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -72,6 +72,7 @@ static void inc_group_count(struct list_head *list, %type value_sym %type event_config %type opt_event_config +%type opt_pmu_config %type event_term %type event_pmu %type event_legacy_symbol @@ -223,7 +224,7 @@ event_def: event_pmu | event_bpf_file event_pmu: -PE_NAME opt_event_config +PE_NAME opt_pmu_config { struct list_head *list, *orig_terms, *terms; @@ -486,6 +487,17 @@ opt_event_config: $$ = NULL; } +opt_pmu_config: +'/' event_config '/' +{ + $$ = $2; +} +| +'/' '/' +{ + $$ = NULL; +} + start_terms: event_config { struct parse_events_state *parse_state = _parse_state; -- GitLab From 52f072f580e4e5acfc65e40273e1cd4b544fbe63 Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Mon, 4 Jun 2018 16:51:19 +0200 Subject: [PATCH 0395/1291] netfilter: ipset: forbid family for hash:mac sets [ Upstream commit cbdebe481a14b42c45aa9f4ceb5ff19b55de2c57 ] Userspace `ipset` command forbids family option for hash:mac type: ipset create test hash:mac family inet4 ipset v6.30: Unknown argument: `family' However, this check is not done in kernel itself. When someone use external netlink applications (pyroute2 python library for example), one can create hash:mac with invalid family and inconsistant results from userspace (`ipset` command cannot read set content anymore). This patch enforce the logic in kernel, and forbids insertion of hash:mac with a family set. Since IP_SET_PROTO_UNDEF is defined only for hash:mac, this patch has no impact on other hash:* sets Signed-off-by: Florent Fourcot Signed-off-by: Victorien Molle Signed-off-by: Jozsef Kadlecsik Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipset/ip_set_hash_gen.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 51063d9ed0f7..dfd268166e42 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1241,7 +1241,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, pr_debug("Create set %s with family %s\n", set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); -#ifndef IP_SET_PROTO_UNDEF +#ifdef IP_SET_PROTO_UNDEF + if (set->family != NFPROTO_UNSPEC) + return -IPSET_ERR_INVALID_FAMILY; +#else if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; #endif -- GitLab From ff60eda504531b7730435f1730c8bf068a209221 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 31 May 2018 18:45:21 +0200 Subject: [PATCH 0396/1291] netfilter: ipset: List timing out entries with "timeout 1" instead of zero [ Upstream commit bd975e691486ba52790ba23cc9b4fecab7bc0d31 ] When listing sets with timeout support, there's a probability that just timing out entries with "0" timeout value is listed/saved. However when restoring the saved list, the zero timeout value means permanent elelements. The new behaviour is that timing out entries are listed with "timeout 1" instead of zero. Fixes netfilter bugzilla #1258. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/netfilter/ipset/ip_set_timeout.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index bfb3531fd88a..7ad8ddf9ca8a 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -65,8 +65,14 @@ ip_set_timeout_set(unsigned long *timeout, u32 value) static inline u32 ip_set_timeout_get(const unsigned long *timeout) { - return *timeout == IPSET_ELEM_PERMANENT ? 0 : - jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; + u32 t; + + if (*timeout == IPSET_ELEM_PERMANENT) + return 0; + + t = jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; + /* Zero value in userspace means no timeout */ + return t == 0 ? 1 : t; } #endif /* __KERNEL__ */ -- GitLab From d90c9b07cb325b5190f5a45ddcf66aa09ed5f552 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Tue, 5 Jun 2018 15:27:27 +0300 Subject: [PATCH 0397/1291] irqchip/ls-scfg-msi: Map MSIs in the iommu [ Upstream commit 0cdd431c337e99177e68597f3de34bedd3a20a74 ] Add the required iommu_dma_map_msi_msg() when composing the MSI message, otherwise the interrupts will not work. Signed-off-by: Laurentiu Tudor Signed-off-by: Thomas Gleixner Cc: jason@lakedaemon.net Cc: marc.zyngier@arm.com Cc: zhiqiang.hou@nxp.com Cc: minghuan.lian@nxp.com Link: https://lkml.kernel.org/r/20180605122727.12831-1-laurentiu.tudor@nxp.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-ls-scfg-msi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c index 119f4ef0d421..b7f943f96068 100644 --- a/drivers/irqchip/irq-ls-scfg-msi.c +++ b/drivers/irqchip/irq-ls-scfg-msi.c @@ -21,6 +21,7 @@ #include #include #include +#include #define MSI_IRQS_PER_MSIR 32 #define MSI_MSIR_OFFSET 4 @@ -94,6 +95,8 @@ static void ls_scfg_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) if (msi_affinity_flag) msg->data |= cpumask_first(data->common->affinity); + + iommu_dma_map_msi_msg(data->irq, msg); } static int ls_scfg_msi_set_affinity(struct irq_data *irq_data, -- GitLab From 2f6a38b131abcb447e32cf5abaac12b6d5e9131d Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Mon, 28 May 2018 08:45:45 +0200 Subject: [PATCH 0398/1291] watchdog: da9063: Fix updating timeout value [ Upstream commit 44ee54aabfdb3b35866ed909bde3ab01e9679385 ] The DA9063 watchdog has only one register field to store the timeout value and to enable the watchdog. The watchdog gets enabled if the value is not zero. There is no issue if the watchdog is already running but it leads into problems if the watchdog is disabled. If the watchdog is disabled and only the timeout value should be prepared the watchdog gets enabled too. Add a check to get the current watchdog state and update the watchdog timeout value on hw-side only if the watchdog is already active. Fixes: 5e9c16e37608 ("watchdog: Add DA9063 PMIC watchdog driver.") Signed-off-by: Marco Felsch Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/da9063_wdt.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/da9063_wdt.c b/drivers/watchdog/da9063_wdt.c index 2a20fc163ed0..4c62ad74aec0 100644 --- a/drivers/watchdog/da9063_wdt.c +++ b/drivers/watchdog/da9063_wdt.c @@ -102,10 +102,23 @@ static int da9063_wdt_set_timeout(struct watchdog_device *wdd, { struct da9063 *da9063 = watchdog_get_drvdata(wdd); unsigned int selector; - int ret; + int ret = 0; selector = da9063_wdt_timeout_to_sel(timeout); - ret = _da9063_wdt_set_timeout(da9063, selector); + + /* + * There are two cases when a set_timeout() will be called: + * 1. The watchdog is off and someone wants to set the timeout for the + * further use. + * 2. The watchdog is already running and a new timeout value should be + * set. + * + * The watchdog can't store a timeout value not equal zero without + * enabling the watchdog, so the timeout must be buffered by the driver. + */ + if (watchdog_active(wdd)) + ret = _da9063_wdt_set_timeout(da9063, selector); + if (ret) dev_err(da9063->dev, "Failed to set watchdog timeout (err = %d)\n", ret); -- GitLab From 8dcf2dbf65efd17512db17526909de05daa0cff2 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 30 May 2018 16:03:50 +0900 Subject: [PATCH 0399/1291] printk: drop in_nmi check from printk_safe_flush_on_panic() [ Upstream commit 554755be08fba31c74f66b82a485e5513205af84 ] Drop the in_nmi() check from printk_safe_flush_on_panic() and attempt to re-init (IOW unlock) locked logbuf spinlock from panic CPU regardless of its context. Otherwise, theoretically, we can deadlock on logbuf trying to flush per-CPU buffers: a) Panic CPU is running in non-NMI context b) Panic CPU sends out shutdown IPI via reboot vector c) Panic CPU fails to stop all remote CPUs d) Panic CPU sends out shutdown IPI via NMI vector One of the CPUs that we bring down via NMI vector can hold logbuf spin lock (theoretically). Link: http://lkml.kernel.org/r/20180530070350.10131-1-sergey.senozhatsky@gmail.com To: Steven Rostedt Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/printk/printk_safe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index d989cc238198..64825b2df3a5 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -284,7 +284,7 @@ void printk_safe_flush_on_panic(void) * Make sure that we could access the main ring buffer. * Do not risk a double release when more CPUs are up. */ - if (in_nmi() && raw_spin_is_locked(&logbuf_lock)) { + if (raw_spin_is_locked(&logbuf_lock)) { if (num_online_cpus() > 1) return; -- GitLab From cdad03c1f21e039affc8a709e5e982964d397039 Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Fri, 11 May 2018 10:52:17 +0800 Subject: [PATCH 0400/1291] bpf, arm32: fix inconsistent naming about emit_a32_lsr_{r64,i64} [ Upstream commit 68565a1af9f7012e6f2fe2bdd612f67d2d830c28 ] The names for BPF_ALU64 | BPF_ARSH are emit_a32_arsh_*, the names for BPF_ALU64 | BPF_LSH are emit_a32_lsh_*, but the names for BPF_ALU64 | BPF_RSH are emit_a32_lsr_*. For consistence reason, let's rename emit_a32_lsr_* to emit_a32_rsh_*. This patch also corrects a wrong comment. Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") Signed-off-by: Wang YanQing Cc: Shubham Bansal Cc: linux-arm-kernel@lists.infradead.org Cc: linux@armlinux.org.uk Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/net/bpf_jit_32.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 323a4df59a6c..ece2d1d43724 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -718,7 +718,7 @@ static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk, } /* dst = dst >> src */ -static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk, +static inline void emit_a32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, bool sstk, struct jit_ctx *ctx) { const u8 *tmp = bpf2a32[TMP_REG_1]; const u8 *tmp2 = bpf2a32[TMP_REG_2]; @@ -734,7 +734,7 @@ static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk, emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } - /* Do LSH operation */ + /* Do RSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); @@ -784,7 +784,7 @@ static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk, } /* dst = dst >> val */ -static inline void emit_a32_lsr_i64(const u8 dst[], bool dstk, +static inline void emit_a32_rsh_i64(const u8 dst[], bool dstk, const u32 val, struct jit_ctx *ctx) { const u8 *tmp = bpf2a32[TMP_REG_1]; const u8 *tmp2 = bpf2a32[TMP_REG_2]; @@ -1340,7 +1340,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_RSH | BPF_K: if (unlikely(imm > 63)) return -EINVAL; - emit_a32_lsr_i64(dst, dstk, imm, ctx); + emit_a32_rsh_i64(dst, dstk, imm, ctx); break; /* dst = dst << src */ case BPF_ALU64 | BPF_LSH | BPF_X: @@ -1348,7 +1348,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; /* dst = dst >> src */ case BPF_ALU64 | BPF_RSH | BPF_X: - emit_a32_lsr_r64(dst, src, dstk, sstk, ctx); + emit_a32_rsh_r64(dst, src, dstk, sstk, ctx); break; /* dst = dst >> src (signed) */ case BPF_ALU64 | BPF_ARSH | BPF_X: -- GitLab From 4c717e335ae14e5479bf6017a9466cbb0648266e Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 30 May 2018 10:13:11 +0800 Subject: [PATCH 0401/1291] ceph: fix alignment of rasize [ Upstream commit c36ed50de2ad1649ce0369a4a6fc2cc11b20dfb7 ] On currently logic: when I specify rasize=0~1 then it will be 4096. when I specify rasize=2~4097 then it will be 8192. Make it the same as rsize & wsize. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/ceph/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 48ffe720bf09..b79b1211a2b5 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -254,7 +254,7 @@ static int parse_fsopt_token(char *c, void *private) case Opt_rasize: if (intval < 0) return -EINVAL; - fsopt->rasize = ALIGN(intval + PAGE_SIZE - 1, PAGE_SIZE); + fsopt->rasize = ALIGN(intval, PAGE_SIZE); break; case Opt_caps_wanted_delay_min: if (intval < 1) -- GitLab From 848f260301d0116a625eb2580956c3119790f29d Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Thu, 10 May 2018 16:28:35 +0900 Subject: [PATCH 0402/1291] e1000e: Ignore TSYNCRXCTL when getting I219 clock attributes [ Upstream commit fff200caf6f9179dd9a7fc67acd659e614c3f72f ] There have been multiple reports of crashes that look like kernel: RIP: 0010:[] timecounter_read+0xf/0x50 [...] kernel: Call Trace: kernel: [] e1000e_phc_gettime+0x2f/0x60 [e1000e] kernel: [] e1000e_systim_overflow_work+0x1d/0x80 [e1000e] kernel: [] process_one_work+0x155/0x440 kernel: [] worker_thread+0x116/0x4b0 kernel: [] kthread+0xd2/0xf0 kernel: [] ret_from_fork+0x3f/0x70 These can be traced back to the fact that e1000e_systim_reset() skips the timecounter_init() call if e1000e_get_base_timinca() returns -EINVAL, which leads to a null deref in timecounter_read(). Commit 83129b37ef35 ("e1000e: fix systim issues", v4.2-rc1) reworked e1000e_get_base_timinca() in such a way that it can return -EINVAL for e1000_pch_spt if the SYSCFI bit is not set in TSYNCRXCTL. Some experimentation has shown that on I219 (e1000_pch_spt, "MAC: 12") adapters, the E1000_TSYNCRXCTL_SYSCFI flag is unstable; TSYNCRXCTL reads sometimes don't have the SYSCFI bit set. Retrying the read shortly after finds the bit to be set. This was observed at boot (probe) but also link up and link down. Moreover, the phc (PTP Hardware Clock) seems to operate normally even after reads where SYSCFI=0. Therefore, remove this register read and unconditionally set the clock parameters. Reported-by: Achim Mildenberger Message-Id: <20180425065243.g5mqewg5irkwgwgv@f2> Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1075876 Fixes: 83129b37ef35 ("e1000e: fix systim issues") Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000e/netdev.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 7a226537877b..6265ce8915b6 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3558,15 +3558,12 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) } break; case e1000_pch_spt: - if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) { - /* Stable 24MHz frequency */ - incperiod = INCPERIOD_24MHZ; - incvalue = INCVALUE_24MHZ; - shift = INCVALUE_SHIFT_24MHZ; - adapter->cc.shift = shift; - break; - } - return -EINVAL; + /* Stable 24MHz frequency */ + incperiod = INCPERIOD_24MHZ; + incvalue = INCVALUE_24MHZ; + shift = INCVALUE_SHIFT_24MHZ; + adapter->cc.shift = shift; + break; case e1000_pch_cnp: if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) { /* Stable 24MHz frequency */ -- GitLab From e27dad1eb1ac7bedb5a033ac2e068543742c807b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 1 Jun 2018 11:31:44 -0700 Subject: [PATCH 0403/1291] infiniband: fix a possible use-after-free bug [ Upstream commit cb2595c1393b4a5211534e6f0a0fbad369e21ad8 ] ucma_process_join() will free the new allocated "mc" struct, if there is any error after that, especially the copy_to_user(). But in parallel, ucma_leave_multicast() could find this "mc" through idr_find() before ucma_process_join() frees it, since it is already published. So "mc" could be used in ucma_leave_multicast() after it is been allocated and freed in ucma_process_join(), since we don't refcnt it. Fix this by separating "publish" from ID allocation, so that we can get an ID first and publish it later after copy_to_user(). Fixes: c8f6a362bf3e ("RDMA/cma: Add multicast communication support") Reported-by: Noam Rathaus Signed-off-by: Cong Wang Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucma.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index e47baf0950e3..a22b992cde38 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -218,7 +218,7 @@ static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) return NULL; mutex_lock(&mut); - mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL); + mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL); mutex_unlock(&mut); if (mc->id < 0) goto error; @@ -1404,6 +1404,10 @@ static ssize_t ucma_process_join(struct ucma_file *file, goto err3; } + mutex_lock(&mut); + idr_replace(&multicast_idr, mc, mc->id); + mutex_unlock(&mut); + mutex_unlock(&file->mut); ucma_put_ctx(ctx); return 0; -- GitLab From 47b3561450178b538b99ba26a4989e0f846f2e8e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 18 May 2018 15:01:16 +0200 Subject: [PATCH 0404/1291] powerpc/lib: Adjust .balign inside string functions for PPC32 [ Upstream commit 1128bb7813a896bd608fb622eee3c26aaf33b473 ] commit 87a156fb18fe1 ("Align hot loops of some string functions") degraded the performance of string functions by adding useless nops A simple benchmark on an 8xx calling 100000x a memchr() that matches the first byte runs in 41668 TB ticks before this patch and in 35986 TB ticks after this patch. So this gives an improvement of approx 10% Another benchmark doing the same with a memchr() matching the 128th byte runs in 1011365 TB ticks before this patch and 1005682 TB ticks after this patch, so regardless on the number of loops, removing those useless nops improves the test by 5683 TB ticks. Fixes: 87a156fb18fe1 ("Align hot loops of some string functions") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/cache.h | 3 +++ arch/powerpc/lib/string.S | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index c1d257aa4c2d..66298461b640 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -9,11 +9,14 @@ #if defined(CONFIG_PPC_8xx) || defined(CONFIG_403GCX) #define L1_CACHE_SHIFT 4 #define MAX_COPY_PREFETCH 1 +#define IFETCH_ALIGN_SHIFT 2 #elif defined(CONFIG_PPC_E500MC) #define L1_CACHE_SHIFT 6 #define MAX_COPY_PREFETCH 4 +#define IFETCH_ALIGN_SHIFT 3 #elif defined(CONFIG_PPC32) #define MAX_COPY_PREFETCH 4 +#define IFETCH_ALIGN_SHIFT 3 /* 603 fetches 2 insn at a time */ #if defined(CONFIG_PPC_47x) #define L1_CACHE_SHIFT 7 #else diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index a787776822d8..0378def28d41 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S @@ -12,6 +12,7 @@ #include #include #include +#include .text @@ -23,7 +24,7 @@ _GLOBAL(strncpy) mtctr r5 addi r6,r3,-1 addi r4,r4,-1 - .balign 16 + .balign IFETCH_ALIGN_BYTES 1: lbzu r0,1(r4) cmpwi 0,r0,0 stbu r0,1(r6) @@ -43,7 +44,7 @@ _GLOBAL(strncmp) mtctr r5 addi r5,r3,-1 addi r4,r4,-1 - .balign 16 + .balign IFETCH_ALIGN_BYTES 1: lbzu r3,1(r5) cmpwi 1,r3,0 lbzu r0,1(r4) @@ -77,7 +78,7 @@ _GLOBAL(memchr) beq- 2f mtctr r5 addi r3,r3,-1 - .balign 16 + .balign IFETCH_ALIGN_BYTES 1: lbzu r0,1(r3) cmpw 0,r0,r4 bdnzf 2,1b -- GitLab From 04cda3ac3312f16ef8bbffa06c55dd1b04d3aad6 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Tue, 24 Apr 2018 14:15:54 +1000 Subject: [PATCH 0405/1291] powerpc/64s: Add barrier_nospec [ Upstream commit a6b3964ad71a61bb7c61d80a60bea7d42187b2eb ] A no-op form of ori (or immediate of 0 into r31 and the result stored in r31) has been re-tasked as a speculation barrier. The instruction only acts as a barrier on newer machines with appropriate firmware support. On older CPUs it remains a harmless no-op. Implement barrier_nospec using this instruction. mpe: The semantics of the instruction are believed to be that it prevents execution of subsequent instructions until preceding branches have been fully resolved and are no longer executing speculatively. There is no further documentation available at this time. Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/barrier.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index c7c63959ba91..e582d2c88092 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -76,6 +76,21 @@ do { \ ___p1; \ }) +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * Prevent execution of subsequent instructions until preceding branches have + * been fully resolved and are no longer executing speculatively. + */ +#define barrier_nospec_asm ori 31,31,0 + +// This also acts as a compiler barrier due to the memory clobber. +#define barrier_nospec() asm (stringify_in_c(barrier_nospec_asm) ::: "memory") + +#else /* !CONFIG_PPC_BOOK3S_64 */ +#define barrier_nospec_asm +#define barrier_nospec() +#endif + #include #endif /* _ASM_POWERPC_BARRIER_H */ -- GitLab From bc0b4615cc975db67ace25e9e8cc7f7dcf207d6b Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 25 May 2018 13:11:30 +1000 Subject: [PATCH 0406/1291] powerpc/eeh: Fix use-after-release of EEH driver [ Upstream commit 46d4be41b987a6b2d25a2ebdd94cafb44e21d6c5 ] Correct two cases where eeh_pcid_get() is used to reference the driver's module but the reference is dropped before the driver pointer is used. In eeh_rmv_device() also refactor a little so that only two calls to eeh_pcid_put() are needed, rather than three and the reference isn't taken at all if it wasn't needed. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/eeh_driver.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index ca2243df9cb2..470284f9e4f6 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -450,9 +450,11 @@ static void *eeh_add_virt_device(void *data, void *userdata) driver = eeh_pcid_get(dev); if (driver) { - eeh_pcid_put(dev); - if (driver->err_handler) + if (driver->err_handler) { + eeh_pcid_put(dev); return NULL; + } + eeh_pcid_put(dev); } #ifdef CONFIG_PPC_POWERNV @@ -489,17 +491,19 @@ static void *eeh_rmv_device(void *data, void *userdata) if (eeh_dev_removed(edev)) return NULL; - driver = eeh_pcid_get(dev); - if (driver) { - eeh_pcid_put(dev); - if (removed && - eeh_pe_passed(edev->pe)) - return NULL; - if (removed && - driver->err_handler && - driver->err_handler->error_detected && - driver->err_handler->slot_reset) + if (removed) { + if (eeh_pe_passed(edev->pe)) return NULL; + driver = eeh_pcid_get(dev); + if (driver) { + if (driver->err_handler && + driver->err_handler->error_detected && + driver->err_handler->slot_reset) { + eeh_pcid_put(dev); + return NULL; + } + eeh_pcid_put(dev); + } } /* Remove it from PCI subsystem */ -- GitLab From 0aceed2d7ec10eae4bab4912cbae11e47c8ceaec Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Thu, 29 Mar 2018 17:02:46 +1100 Subject: [PATCH 0407/1291] hvc_opal: don't set tb_ticks_per_usec in udbg_init_opal_common() [ Upstream commit 447808bf500a7cc92173266a59f8a494e132b122 ] time_init() will set up tb_ticks_per_usec based on reality. time_init() is called *after* udbg_init_opal_common() during boot. from arch/powerpc/kernel/time.c: unsigned long tb_ticks_per_usec = 100; /* sane default */ Currently, all powernv systems have a timebase frequency of 512mhz (512000000/1000000 == 0x200) - although there's nothing written down anywhere that I can find saying that we couldn't make that different based on the requirements in the ISA. So, we've been (accidentally) thwacking the (currently) correct (for powernv at least) value for tb_ticks_per_usec earlier than we otherwise would have. The "sane default" seems to be adequate for our purposes between udbg_init_opal_common() and time_init() being called, and if it isn't, then we should probably be setting it somewhere that isn't hvc_opal.c! Signed-off-by: Stewart Smith Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/hvc/hvc_opal.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c index 16331a90c1e8..9da8474fe50a 100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@ -332,7 +332,6 @@ static void udbg_init_opal_common(void) udbg_putc = udbg_opal_putc; udbg_getc = udbg_opal_getc; udbg_getc_poll = udbg_opal_getc_poll; - tb_ticks_per_usec = 0x200; /* Make udelay not suck */ } void __init hvc_opal_init_early(void) -- GitLab From a23e1da9773bcf90b0caac3c3a7fc06fea351181 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 30 May 2018 20:31:22 +1000 Subject: [PATCH 0408/1291] powerpc/64s: Fix compiler store ordering to SLB shadow area [ Upstream commit 926bc2f100c24d4842b3064b5af44ae964c1d81c ] The stores to update the SLB shadow area must be made as they appear in the C code, so that the hypervisor does not see an entry with mismatched vsid and esid. Use WRITE_ONCE for this. GCC has been observed to elide the first store to esid in the update, which means that if the hypervisor interrupts the guest after storing to vsid, it could see an entry with old esid and new vsid, which may possibly result in memory corruption. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/slb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 13cfe413b40d..6d9bf014b3e7 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -62,14 +62,14 @@ static inline void slb_shadow_update(unsigned long ea, int ssize, * updating it. No write barriers are needed here, provided * we only update the current CPU's SLB shadow buffer. */ - p->save_area[index].esid = 0; - p->save_area[index].vsid = cpu_to_be64(mk_vsid_data(ea, ssize, flags)); - p->save_area[index].esid = cpu_to_be64(mk_esid_data(ea, ssize, index)); + WRITE_ONCE(p->save_area[index].esid, 0); + WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags))); + WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index))); } static inline void slb_shadow_clear(enum slb_index index) { - get_slb_shadow()->save_area[index].esid = 0; + WRITE_ONCE(get_slb_shadow()->save_area[index].esid, 0); } static inline void create_shadowed_slbe(unsigned long ea, int ssize, -- GitLab From 929e1a3906e15ca66f2e750a6000ecc30e390edd Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 May 2018 14:56:19 +0300 Subject: [PATCH 0409/1291] RDMA/mad: Convert BUG_ONs to error flows [ Upstream commit 2468b82d69e3a53d024f28d79ba0fdb8bf43dfbf ] Let's perform checks in-place instead of BUG_ONs. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/mad.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index d8efdc191c27..55252079faf6 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1558,7 +1558,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, mad_reg_req->oui, 3)) { method = &(*vendor_table)->vendor_class[ vclass]->method_table[i]; - BUG_ON(!*method); + if (!*method) + goto error3; goto check_in_use; } } @@ -1568,10 +1569,12 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, vclass]->oui[i])) { method = &(*vendor_table)->vendor_class[ vclass]->method_table[i]; - BUG_ON(*method); /* Allocate method table for this OUI */ - if ((ret = allocate_method_table(method))) - goto error3; + if (!*method) { + ret = allocate_method_table(method); + if (ret) + goto error3; + } memcpy((*vendor_table)->vendor_class[vclass]->oui[i], mad_reg_req->oui, 3); goto check_in_use; -- GitLab From 4f5fd8a1ae16ccbb851bc2ca9243319c67f04955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 1 Jun 2018 15:04:19 +0200 Subject: [PATCH 0410/1291] lightnvm: pblk: warn in case of corrupted write buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e37d07983af9068de0303054542d2652ca917f58 ] When cleaning up buffer entries as we wrap up, their state should be "completed". If any of the entries is in "submitted" state, it means that something bad has happened. Trigger a warning immediately instead of waiting for the state flag to eventually be updated, thus hiding the issue. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/lightnvm/pblk-rb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 9bc32578a766..c0dd17a82170 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -142,10 +142,9 @@ static void clean_wctx(struct pblk_w_ctx *w_ctx) { int flags; -try: flags = READ_ONCE(w_ctx->flags); - if (!(flags & PBLK_SUBMITTED_ENTRY)) - goto try; + WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY), + "pblk: overwriting unsubmitted data\n"); /* Release flags on context. Protect from writes and reads */ smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY); -- GitLab From 20fc8b34c1955d0a8dd2e0bca49d287b65f3f9a8 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 31 May 2018 19:53:33 +0300 Subject: [PATCH 0411/1291] netfilter: nf_tables: check msg_type before nft_trans_set(trans) [ Upstream commit 9c7f96fd77b0dbe1fe7ed1f9c462c45dc48a1076 ] The patch moves the "trans->msg_type == NFT_MSG_NEWSET" check before using nft_trans_set(trans). Otherwise we can get out of bounds read. For example, KASAN reported the one when running 0001_cache_handling_0 nft test. In this case "trans->msg_type" was NFT_MSG_NEWTABLE: [75517.177808] BUG: KASAN: slab-out-of-bounds in nft_set_lookup_global+0x22f/0x270 [nf_tables] [75517.279094] Read of size 8 at addr ffff881bdb643fc8 by task nft/7356 ... [75517.375605] CPU: 26 PID: 7356 Comm: nft Tainted: G E 4.17.0-rc7.1.x86_64 #1 [75517.489587] Hardware name: Oracle Corporation SUN SERVER X4-2 [75517.618129] Call Trace: [75517.648821] dump_stack+0xd1/0x13b [75517.691040] ? show_regs_print_info+0x5/0x5 [75517.742519] ? kmsg_dump_rewind_nolock+0xf5/0xf5 [75517.799300] ? lock_acquire+0x143/0x310 [75517.846738] print_address_description+0x85/0x3a0 [75517.904547] kasan_report+0x18d/0x4b0 [75517.949892] ? nft_set_lookup_global+0x22f/0x270 [nf_tables] [75518.019153] ? nft_set_lookup_global+0x22f/0x270 [nf_tables] [75518.088420] ? nft_set_lookup_global+0x22f/0x270 [nf_tables] [75518.157689] nft_set_lookup_global+0x22f/0x270 [nf_tables] [75518.224869] nf_tables_newsetelem+0x1a5/0x5d0 [nf_tables] [75518.291024] ? nft_add_set_elem+0x2280/0x2280 [nf_tables] [75518.357154] ? nla_parse+0x1a5/0x300 [75518.401455] ? kasan_kmalloc+0xa6/0xd0 [75518.447842] nfnetlink_rcv+0xc43/0x1bdf [nfnetlink] [75518.507743] ? nfnetlink_rcv+0x7a5/0x1bdf [nfnetlink] [75518.569745] ? nfnl_err_reset+0x3c0/0x3c0 [nfnetlink] [75518.631711] ? lock_acquire+0x143/0x310 [75518.679133] ? netlink_deliver_tap+0x9b/0x1070 [75518.733840] ? kasan_unpoison_shadow+0x31/0x40 [75518.788542] netlink_unicast+0x45d/0x680 [75518.837111] ? __isolate_free_page+0x890/0x890 [75518.891913] ? netlink_attachskb+0x6b0/0x6b0 [75518.944542] netlink_sendmsg+0x6fa/0xd30 [75518.993107] ? netlink_unicast+0x680/0x680 [75519.043758] ? netlink_unicast+0x680/0x680 [75519.094402] sock_sendmsg+0xd9/0x160 [75519.138810] ___sys_sendmsg+0x64d/0x980 [75519.186234] ? copy_msghdr_from_user+0x350/0x350 [75519.243118] ? lock_downgrade+0x650/0x650 [75519.292738] ? do_raw_spin_unlock+0x5d/0x250 [75519.345456] ? _raw_spin_unlock+0x24/0x30 [75519.395065] ? __handle_mm_fault+0xbde/0x3410 [75519.448830] ? sock_setsockopt+0x3d2/0x1940 [75519.500516] ? __lock_acquire.isra.25+0xdc/0x19d0 [75519.558448] ? lock_downgrade+0x650/0x650 [75519.608057] ? __audit_syscall_entry+0x317/0x720 [75519.664960] ? __fget_light+0x58/0x250 [75519.711325] ? __sys_sendmsg+0xde/0x170 [75519.758850] __sys_sendmsg+0xde/0x170 [75519.804193] ? __ia32_sys_shutdown+0x90/0x90 [75519.856725] ? syscall_trace_enter+0x897/0x10e0 [75519.912354] ? trace_event_raw_event_sys_enter+0x920/0x920 [75519.979432] ? __audit_syscall_entry+0x720/0x720 [75520.036118] do_syscall_64+0xa3/0x3d0 [75520.081248] ? prepare_exit_to_usermode+0x47/0x1d0 [75520.139904] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [75520.201680] RIP: 0033:0x7fc153320ba0 [75520.245772] RSP: 002b:00007ffe294c3638 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [75520.337708] RAX: ffffffffffffffda RBX: 00007ffe294c4820 RCX: 00007fc153320ba0 [75520.424547] RDX: 0000000000000000 RSI: 00007ffe294c46b0 RDI: 0000000000000003 [75520.511386] RBP: 00007ffe294c47b0 R08: 0000000000000004 R09: 0000000002114090 [75520.598225] R10: 00007ffe294c30a0 R11: 0000000000000246 R12: 00007ffe294c3660 [75520.684961] R13: 0000000000000001 R14: 00007ffe294c3650 R15: 0000000000000001 [75520.790946] Allocated by task 7356: [75520.833994] kasan_kmalloc+0xa6/0xd0 [75520.878088] __kmalloc+0x189/0x450 [75520.920107] nft_trans_alloc_gfp+0x20/0x190 [nf_tables] [75520.983961] nf_tables_newtable+0xcd0/0x1bd0 [nf_tables] [75521.048857] nfnetlink_rcv+0xc43/0x1bdf [nfnetlink] [75521.108655] netlink_unicast+0x45d/0x680 [75521.157013] netlink_sendmsg+0x6fa/0xd30 [75521.205271] sock_sendmsg+0xd9/0x160 [75521.249365] ___sys_sendmsg+0x64d/0x980 [75521.296686] __sys_sendmsg+0xde/0x170 [75521.341822] do_syscall_64+0xa3/0x3d0 [75521.386957] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [75521.467867] Freed by task 23454: [75521.507804] __kasan_slab_free+0x132/0x180 [75521.558137] kfree+0x14d/0x4d0 [75521.596005] free_rt_sched_group+0x153/0x280 [75521.648410] sched_autogroup_create_attach+0x19a/0x520 [75521.711330] ksys_setsid+0x2ba/0x400 [75521.755529] __ia32_sys_setsid+0xa/0x10 [75521.802850] do_syscall_64+0xa3/0x3d0 [75521.848090] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [75521.929000] The buggy address belongs to the object at ffff881bdb643f80 which belongs to the cache kmalloc-96 of size 96 [75522.079797] The buggy address is located 72 bytes inside of 96-byte region [ffff881bdb643f80, ffff881bdb643fe0) [75522.221234] The buggy address belongs to the page: [75522.280100] page:ffffea006f6d90c0 count:1 mapcount:0 mapping:0000000000000000 index:0x0 [75522.377443] flags: 0x2fffff80000100(slab) [75522.426956] raw: 002fffff80000100 0000000000000000 0000000000000000 0000000180200020 [75522.521275] raw: ffffea006e6fafc0 0000000c0000000c ffff881bf180f400 0000000000000000 [75522.615601] page dumped because: kasan: bad access detected Fixes: 37a9cc525525 ("netfilter: nf_tables: add generation mask to sets") Signed-off-by: Alexey Kodanev Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 85b549e84104..9a945024a0b6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2710,12 +2710,13 @@ static struct nft_set *nf_tables_set_lookup_byid(const struct net *net, u32 id = ntohl(nla_get_be32(nla)); list_for_each_entry(trans, &net->nft.commit_list, list) { - struct nft_set *set = nft_trans_set(trans); + if (trans->msg_type == NFT_MSG_NEWSET) { + struct nft_set *set = nft_trans_set(trans); - if (trans->msg_type == NFT_MSG_NEWSET && - id == nft_trans_set_id(trans) && - nft_active_genmask(set, genmask)) - return set; + if (id == nft_trans_set_id(trans) && + nft_active_genmask(set, genmask)) + return set; + } } return ERR_PTR(-ENOENT); } -- GitLab From 1339e2b8eaea3db8996eeeb6dafd2a0c600ee843 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 May 2018 11:17:16 -0400 Subject: [PATCH 0412/1291] pnfs: Don't release the sequence slot until we've processed layoutget on open [ Upstream commit ae55e59da0e401893b3c52b575fc18a00623d0a1 ] If the server recalls the layout that was just handed out, we risk hitting a race as described in RFC5661 Section 2.10.6.3 unless we ensure that we release the sequence slot after processing the LAYOUTGET operation that was sent as part of the OPEN compound. Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index df0455cf03af..43fbf4495090 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2695,7 +2695,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, if (ret != 0) goto out; - state = nfs4_opendata_to_nfs4_state(opendata); + state = _nfs4_opendata_to_nfs4_state(opendata); ret = PTR_ERR(state); if (IS_ERR(state)) goto out; @@ -2731,6 +2731,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, nfs4_schedule_stateid_recovery(server, state); } out: + nfs4_sequence_free_slot(&opendata->o_res.seq_res); return ret; } -- GitLab From 122031c292023f520befba701eb9895c3e0fc48c Mon Sep 17 00:00:00 2001 From: Anatoly Pugachev Date: Mon, 28 May 2018 02:06:37 +0300 Subject: [PATCH 0413/1291] disable loading f2fs module on PAGE_SIZE > 4KB [ Upstream commit 4071e67cffcc5c2a007116a02437471351f550eb ] The following patch disables loading of f2fs module on architectures which have PAGE_SIZE > 4096 , since it is impossible to mount f2fs on such architectures , log messages are: mount: /mnt: wrong fs type, bad option, bad superblock on /dev/vdiskb1, missing codepage or helper program, or other error. /dev/vdiskb1: F2FS filesystem, UUID=1d8b9ca4-2389-4910-af3b-10998969f09c, volume name "" May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid page_cache_size (8192), supports only 4KB May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Can't find valid F2FS filesystem in 1th superblock May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid page_cache_size (8192), supports only 4KB May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Can't find valid F2FS filesystem in 2th superblock May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid page_cache_size (8192), supports only 4KB which was introduced by git commit 5c9b469295fb6b10d98923eab5e79c4edb80ed20 tested on git kernel 4.17.0-rc6-00309-gec30dcf7f425 with patch applied: modprobe: ERROR: could not insert 'f2fs': Invalid argument May 28 01:40:28 v215 kernel: F2FS not supported on PAGE_SIZE(8192) != 4096 Signed-off-by: Anatoly Pugachev Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 933c3d529e65..400c00058bad 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2663,6 +2663,12 @@ static int __init init_f2fs_fs(void) { int err; + if (PAGE_SIZE != F2FS_BLKSIZE) { + printk("F2FS not supported on PAGE_SIZE(%lu) != %d\n", + PAGE_SIZE, F2FS_BLKSIZE); + return -EINVAL; + } + f2fs_build_trace_ios(); err = init_inodecache(); -- GitLab From f3f029197738730a7b74d1b2d25288586975df47 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 28 May 2018 16:59:27 +0800 Subject: [PATCH 0414/1291] f2fs: fix error path of move_data_page [ Upstream commit 14a28559f43ac7c0b98dd1b0e73ec9ec8ab4fc45 ] This patch fixes error path of move_data_page: - clear cold data flag if it fails to write page. - redirty page for non-ENOMEM case. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/gc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e5673a9b2619..f2f897cd23c9 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -768,9 +768,14 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, set_cold_data(page); err = do_write_data_page(&fio); - if (err == -ENOMEM && is_dirty) { - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto retry; + if (err) { + clear_cold_data(page); + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + if (is_dirty) + set_page_dirty(page); } } out: -- GitLab From de13b2ac741f52000f6de185a78cc844b7565fe5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 26 May 2018 18:03:34 +0800 Subject: [PATCH 0415/1291] f2fs: fix to don't trigger writeback during recovery [ Upstream commit 64c74a7ab505ea40d1b3e5d02735ecab08ae1b14 ] - f2fs_fill_super - recover_fsync_data - recover_data - del_fsync_inode - iput - iput_final - write_inode_now - f2fs_write_inode - f2fs_balance_fs - f2fs_balance_fs_bg - sync_dirty_inodes With data_flush mount option, during recovery, in order to avoid entering above writeback flow, let's detect recovery status and do skip in f2fs_balance_fs_bg. Signed-off-by: Chao Yu Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/segment.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 271516db8939..2793b65a1c1c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -435,6 +435,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) { + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + return; + /* try to shrink extent cache when there is no enough memory */ if (!available_free_memory(sbi, EXTENT_CACHE)) f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); -- GitLab From 4f979af7b0ad6d88723bf1ec117b6e44dca38148 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Apr 2018 10:36:13 +0800 Subject: [PATCH 0416/1291] f2fs: fix to wait page writeback during revoking atomic write [ Upstream commit e5e5732d8120654159254c16834bc8663d8be124 ] After revoking atomic write, related LBA can be reused by others, so we need to wait page writeback before reusing the LBA, in order to avoid interference between old atomic written in-flight IO and new IO. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2793b65a1c1c..7c05bd4222b2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -225,6 +225,8 @@ static int __revoke_inmem_pages(struct inode *inode, lock_page(page); + f2fs_wait_on_page_writeback(page, DATA, true); + if (recover) { struct dnode_of_data dn; struct node_info ni; -- GitLab From c92d09e35d2ddf8ec3e3e3c755241cfde670236c Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Fri, 18 May 2018 11:51:52 +0530 Subject: [PATCH 0417/1291] f2fs: Fix deadlock in shutdown ioctl [ Upstream commit 60b2b4ee2bc01dd052f99fa9d65da2232102ef8e ] f2fs_ioc_shutdown() ioctl gets stuck in the below path when issued with F2FS_GOING_DOWN_FULLSYNC option. __switch_to+0x90/0xc4 percpu_down_write+0x8c/0xc0 freeze_super+0xec/0x1e4 freeze_bdev+0xc4/0xcc f2fs_ioctl+0xc0c/0x1ce0 f2fs_compat_ioctl+0x98/0x1f0 Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/file.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 72c6a9e9a9b4..97537ce5127c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1808,9 +1808,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) if (get_user(in, (__u32 __user *)arg)) return -EFAULT; - ret = mnt_want_write_file(filp); - if (ret) - return ret; + if (in != F2FS_GOING_DOWN_FULLSYNC) { + ret = mnt_want_write_file(filp); + if (ret) + return ret; + } switch (in) { case F2FS_GOING_DOWN_FULLSYNC: @@ -1838,7 +1840,8 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) } f2fs_update_time(sbi, REQ_TIME); out: - mnt_drop_write_file(filp); + if (in != F2FS_GOING_DOWN_FULLSYNC) + mnt_drop_write_file(filp); return ret; } -- GitLab From ad8d61efc9b7b158095fac6a7f764d6897ef7952 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 21 Apr 2018 17:53:52 +0800 Subject: [PATCH 0418/1291] f2fs: fix to detect failure of dquot_initialize [ Upstream commit c22aecd75919511abea872b201751e0be1add898 ] dquot_initialize() can fail due to any exception inside quota subsystem, f2fs needs to be aware of it, and return correct return value to caller. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 97537ce5127c..1b361afcbb67 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2493,7 +2493,9 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid) } f2fs_put_page(ipage, 1); - dquot_initialize(inode); + err = dquot_initialize(inode); + if (err) + goto out_unlock; transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); if (!IS_ERR(transfer_to[PRJQUOTA])) { -- GitLab From 67226fb52c23e0df7d1e9f50fd29f880eb08ae69 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 17 Apr 2018 17:51:28 +0800 Subject: [PATCH 0419/1291] f2fs: fix race in between GC and atomic open [ Upstream commit 27319ba4044c0c67d62ae39e53c0118c89f0a029 ] Thread GC thread - f2fs_ioc_start_atomic_write - get_dirty_pages - filemap_write_and_wait_range - f2fs_gc - do_garbage_collect - gc_data_segment - move_data_page - f2fs_is_atomic_file - set_page_dirty - set_inode_flag(, FI_ATOMIC_FILE) Dirty data page can still be generated by GC in race condition as above call stack. This patch adds fi->dio_rwsem[WRITE] in f2fs_ioc_start_atomic_write to avoid such race. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1b361afcbb67..87e654c53c31 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1630,6 +1630,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode_lock(inode); + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + if (f2fs_is_atomic_file(inode)) goto out; @@ -1659,6 +1661,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) stat_inc_atomic_write(inode); stat_update_max_atomic_write(inode); out: + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); inode_unlock(inode); mnt_drop_write_file(filp); return ret; -- GitLab From 63019044fb8923974ba735a6b580a8ba16219471 Mon Sep 17 00:00:00 2001 From: Filippo Muzzini Date: Thu, 31 May 2018 15:23:11 +0200 Subject: [PATCH 0420/1291] block, bfq: remove wrong lock in bfq_requests_merged [ Upstream commit a12bffebc0c9d6a5851f062aaea3aa7c4adc6042 ] In bfq_requests_merged(), there is a deadlock because the lock on bfqq->bfqd->lock is held by the calling function, but the code of this function tries to grab the lock again. This deadlock is currently hidden by another bug (fixed by next commit for this source file), which causes the body of bfq_requests_merged() to be never executed. This commit removes the deadlock by removing the lock/unlock pair. Signed-off-by: Filippo Muzzini Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/bfq-iosched.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 56c9cd01fd1d..4a4b7d3c909a 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -1678,7 +1678,6 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, if (!RB_EMPTY_NODE(&rq->rb_node)) goto end; - spin_lock_irq(&bfqq->bfqd->lock); /* * If next and rq belong to the same bfq_queue and next is older @@ -1702,7 +1701,6 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, bfq_remove_request(q, next); - spin_unlock_irq(&bfqq->bfqd->lock); end: bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags); } -- GitLab From bc30588b8e082f48882f713c19369bc9ae4c5509 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Tue, 29 May 2018 16:13:03 -0600 Subject: [PATCH 0421/1291] usbip: usbip_detach: Fix memory, udev context and udev leak [ Upstream commit d179f99a651685b19333360e6558110da2fe9bd7 ] detach_port() fails to call usbip_vhci_driver_close() from its error path after usbip_vhci_detach_device() returns failure, leaking memory allocated in usbip_vhci_driver_open() and holding udev_context and udev references. Fix it to call usbip_vhci_driver_close(). Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbip_detach.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/usb/usbip/src/usbip_detach.c b/tools/usb/usbip/src/usbip_detach.c index 9db9d21bb2ec..6a8db858caa5 100644 --- a/tools/usb/usbip/src/usbip_detach.c +++ b/tools/usb/usbip/src/usbip_detach.c @@ -43,7 +43,7 @@ void usbip_detach_usage(void) static int detach_port(char *port) { - int ret; + int ret = 0; uint8_t portnum; char path[PATH_MAX+1]; @@ -73,9 +73,12 @@ static int detach_port(char *port) } ret = usbip_vhci_detach_device(portnum); - if (ret < 0) - return -1; + if (ret < 0) { + ret = -1; + goto call_driver_close; + } +call_driver_close: usbip_vhci_driver_close(); return ret; -- GitLab From 38c8c0a9709d2159fd989b0061b0b73364427b64 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Fri, 25 May 2018 16:23:46 +0200 Subject: [PATCH 0422/1291] usbip: dynamically allocate idev by nports found in sysfs [ Upstream commit de19ca6fd72c7dd45ad82403e7b3fe9c74ef6767 ] As the amount of available ports varies by the kernels build configuration. To remove the limitation of the fixed 128 ports we allocate the amount of idevs by using the number we get from the kernel. Signed-off-by: Michael Grzeschik Acked-by: Shuah Khan (Samsung OSG) Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/libsrc/vhci_driver.c | 32 +++++++++++++++++----------- tools/usb/usbip/libsrc/vhci_driver.h | 3 +-- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c index d1fc0f9f00fb..ed8c9d360c0f 100644 --- a/tools/usb/usbip/libsrc/vhci_driver.c +++ b/tools/usb/usbip/libsrc/vhci_driver.c @@ -135,11 +135,11 @@ static int refresh_imported_device_list(void) return 0; } -static int get_nports(void) +static int get_nports(struct udev_device *hc_device) { const char *attr_nports; - attr_nports = udev_device_get_sysattr_value(vhci_driver->hc_device, "nports"); + attr_nports = udev_device_get_sysattr_value(hc_device, "nports"); if (!attr_nports) { err("udev_device_get_sysattr_value nports failed"); return -1; @@ -242,35 +242,41 @@ static int read_record(int rhport, char *host, unsigned long host_len, int usbip_vhci_driver_open(void) { + int nports; + struct udev_device *hc_device; + udev_context = udev_new(); if (!udev_context) { err("udev_new failed"); return -1; } - vhci_driver = calloc(1, sizeof(struct usbip_vhci_driver)); - /* will be freed in usbip_driver_close() */ - vhci_driver->hc_device = + hc_device = udev_device_new_from_subsystem_sysname(udev_context, USBIP_VHCI_BUS_TYPE, USBIP_VHCI_DEVICE_NAME); - if (!vhci_driver->hc_device) { + if (!hc_device) { err("udev_device_new_from_subsystem_sysname failed"); goto err; } - vhci_driver->nports = get_nports(); - dbg("available ports: %d", vhci_driver->nports); - - if (vhci_driver->nports <= 0) { + nports = get_nports(hc_device); + if (nports <= 0) { err("no available ports"); goto err; - } else if (vhci_driver->nports > MAXNPORT) { - err("port number exceeds %d", MAXNPORT); + } + dbg("available ports: %d", nports); + + vhci_driver = calloc(1, sizeof(struct usbip_vhci_driver) + + nports * sizeof(struct usbip_imported_device)); + if (!vhci_driver) { + err("vhci_driver allocation failed"); goto err; } + vhci_driver->nports = nports; + vhci_driver->hc_device = hc_device; vhci_driver->ncontrollers = get_ncontrollers(); dbg("available controllers: %d", vhci_driver->ncontrollers); @@ -285,7 +291,7 @@ int usbip_vhci_driver_open(void) return 0; err: - udev_device_unref(vhci_driver->hc_device); + udev_device_unref(hc_device); if (vhci_driver) free(vhci_driver); diff --git a/tools/usb/usbip/libsrc/vhci_driver.h b/tools/usb/usbip/libsrc/vhci_driver.h index 418b404d5121..6c9aca216705 100644 --- a/tools/usb/usbip/libsrc/vhci_driver.h +++ b/tools/usb/usbip/libsrc/vhci_driver.h @@ -13,7 +13,6 @@ #define USBIP_VHCI_BUS_TYPE "platform" #define USBIP_VHCI_DEVICE_NAME "vhci_hcd.0" -#define MAXNPORT 128 enum hub_speed { HUB_SPEED_HIGH = 0, @@ -41,7 +40,7 @@ struct usbip_vhci_driver { int ncontrollers; int nports; - struct usbip_imported_device idev[MAXNPORT]; + struct usbip_imported_device idev[]; }; -- GitLab From 71b1bf6e97534f11dc268d4c9b7e5a9b9235e662 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 3 May 2018 11:25:08 -0700 Subject: [PATCH 0423/1291] perf/x86/intel/uncore: Correct fixed counter index check in generic code [ Upstream commit 4749f8196452eeb73cf2086a6a9705bae479d33d ] There is no index which is bigger than UNCORE_PMC_IDX_FIXED. The only exception is client IMC uncore, which has been specially handled. For generic code, it is not correct to use >= to check fixed counter. The code quality issue will bring problem when a new counter index is introduced. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: acme@kernel.org Cc: eranian@google.com Link: http://lkml.kernel.org/r/1525371913-10597-3-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/uncore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index d45e06346f14..c56cb37b88e3 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -218,7 +218,7 @@ void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *e u64 prev_count, new_count, delta; int shift; - if (event->hw.idx >= UNCORE_PMC_IDX_FIXED) + if (event->hw.idx == UNCORE_PMC_IDX_FIXED) shift = 64 - uncore_fixed_ctr_bits(box); else shift = 64 - uncore_perf_ctr_bits(box); -- GitLab From 596a9bfe8190ccd8ea9d9b6fa16390a575b02d32 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 3 May 2018 11:25:07 -0700 Subject: [PATCH 0424/1291] perf/x86/intel/uncore: Correct fixed counter index check for NHM [ Upstream commit d71f11c076c420c4e2fceb4faefa144e055e0935 ] For Nehalem and Westmere, there is only one fixed counter for W-Box. There is no index which is bigger than UNCORE_PMC_IDX_FIXED. It is not correct to use >= to check fixed counter. The code quality issue will bring problem when new counter index is introduced. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: acme@kernel.org Cc: eranian@google.com Link: http://lkml.kernel.org/r/1525371913-10597-2-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/uncore_nhmex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 93e7a8397cde..173e2674be6e 100644 --- a/arch/x86/events/intel/uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -246,7 +246,7 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p { struct hw_perf_event *hwc = &event->hw; - if (hwc->idx >= UNCORE_PMC_IDX_FIXED) + if (hwc->idx == UNCORE_PMC_IDX_FIXED) wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0) wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); -- GitLab From 9c8f3af662827d596ecde90c6c7ce069c0e404eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20D=C3=ADaz?= Date: Tue, 10 Apr 2018 17:11:15 -0500 Subject: [PATCH 0425/1291] selftests/intel_pstate: Improve test, minor fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e9d33f149f52981fd856a0b16aa8ebda89b02e34 ] A few changes improve the overall usability of the test: * fix a hard-coded maximum frequency (3300), * don't adjust the CPU frequency if only evaluating results, * fix a comparison for multiple frequencies. A symptom of that last issue looked like this: ./run.sh: line 107: [: too many arguments ./run.sh: line 110: 3099 3099 3100-3100: syntax error in expression (error token is \"3099 3100-3100\") Because a check will count how many differente frequencies there are among the CPUs of the system, and after they are tallied another read is performed, which might produce different results. Signed-off-by: Daniel Díaz Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/intel_pstate/run.sh | 24 +++++++++------------ 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh index c670359becc6..bde31a0be5ca 100755 --- a/tools/testing/selftests/intel_pstate/run.sh +++ b/tools/testing/selftests/intel_pstate/run.sh @@ -48,11 +48,12 @@ function run_test () { echo "sleeping for 5 seconds" sleep 5 - num_freqs=$(cat /proc/cpuinfo | grep MHz | sort -u | wc -l) - if [ $num_freqs -le 2 ]; then - cat /proc/cpuinfo | grep MHz | sort -u | tail -1 > /tmp/result.$1 + grep MHz /proc/cpuinfo | sort -u > /tmp/result.freqs + num_freqs=$(wc -l /tmp/result.freqs | awk ' { print $1 } ') + if [ $num_freqs -ge 2 ]; then + tail -n 1 /tmp/result.freqs > /tmp/result.$1 else - cat /proc/cpuinfo | grep MHz | sort -u > /tmp/result.$1 + cp /tmp/result.freqs /tmp/result.$1 fi ./msr 0 >> /tmp/result.$1 @@ -82,21 +83,20 @@ _max_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $2 } ') max_freq=$(($_max_freq / 1000)) -for freq in `seq $max_freq -100 $min_freq` +[ $EVALUATE_ONLY -eq 0 ] && for freq in `seq $max_freq -100 $min_freq` do echo "Setting maximum frequency to $freq" cpupower frequency-set -g powersave --max=${freq}MHz >& /dev/null - [ $EVALUATE_ONLY -eq 0 ] && run_test $freq + run_test $freq done -echo "==============================================================================" +[ $EVALUATE_ONLY -eq 0 ] && cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null +echo "==============================================================================" echo "The marketing frequency of the cpu is $mkt_freq MHz" echo "The maximum frequency of the cpu is $max_freq MHz" echo "The minimum frequency of the cpu is $min_freq MHz" -cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null - # make a pretty table echo "Target Actual Difference MSR(0x199) max_perf_pct" for freq in `seq $max_freq -100 $min_freq` @@ -104,10 +104,6 @@ do result_freq=$(cat /tmp/result.${freq} | grep "cpu MHz" | awk ' { print $4 } ' | awk -F "." ' { print $1 } ') msr=$(cat /tmp/result.${freq} | grep "msr" | awk ' { print $3 } ') max_perf_pct=$(cat /tmp/result.${freq} | grep "max_perf_pct" | awk ' { print $2 } ' ) - if [ $result_freq -eq $freq ]; then - echo " $freq $result_freq 0 $msr $(($max_perf_pct*3300))" - else - echo " $freq $result_freq $(($result_freq-$freq)) $msr $(($max_perf_pct*$max_freq))" - fi + echo " $freq $result_freq $(($result_freq-$freq)) $msr $(($max_perf_pct*$max_freq))" done exit 0 -- GitLab From 4312c2df0b7b77e45a8727fba5cf9a36729e07bc Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Fri, 4 May 2018 13:33:37 -0600 Subject: [PATCH 0426/1291] selftests: memfd: return Kselftest Skip code for skipped tests [ Upstream commit b27f0259e8cea74c627327c063742a83613dd460 ] When memfd test is skipped because of unmet dependencies and/or unsupported configuration, it returns non-zero value which is treated as a fail by the Kselftest framework. This leads to false negative result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Added an explicit check for root user at the start of memfd hugetlbfs test and return skip code if a non-root user attempts to run it. In addition, return skip code when not enough huge pages are available to run the test. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Reviewed-by: Mike Kravetz Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/memfd/run_tests.sh | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_tests.sh index daabb350697c..bf83db61013a 100755 --- a/tools/testing/selftests/memfd/run_tests.sh +++ b/tools/testing/selftests/memfd/run_tests.sh @@ -1,6 +1,9 @@ #!/bin/bash # please run as root +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + # # Normal tests requiring no special resources # @@ -29,12 +32,13 @@ if [ -n "$freepgs" ] && [ $freepgs -lt $hpages_test ]; then nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` hpages_needed=`expr $hpages_test - $freepgs` + if [ $UID != 0 ]; then + echo "Please run memfd with hugetlbfs test as root" + exit $ksft_skip + fi + echo 3 > /proc/sys/vm/drop_caches echo $(( $hpages_needed + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages - if [ $? -ne 0 ]; then - echo "Please run this test as root" - exit 1 - fi while read name size unit; do if [ "$name" = "HugePages_Free:" ]; then freepgs=$size @@ -53,7 +57,7 @@ if [ $freepgs -lt $hpages_test ]; then fi printf "Not enough huge pages available (%d < %d)\n" \ $freepgs $needpgs - exit 1 + exit $ksft_skip fi # -- GitLab From f559bb20c1aeedb0a9efe321b3e42d97556aa970 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Thu, 3 May 2018 17:09:40 -0600 Subject: [PATCH 0427/1291] selftests: intel_pstate: return Kselftest Skip code for skipped tests [ Upstream commit 5c30a038fb8ec8cdff011e6b5d5d51eb415381d4 ] When intel_pstate test is skipped because of unmet dependencies and/or unsupported configuration, it returns 0 which is treated as a pass by the Kselftest framework. This leads to false positive result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/intel_pstate/run.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh index bde31a0be5ca..928978804342 100755 --- a/tools/testing/selftests/intel_pstate/run.sh +++ b/tools/testing/selftests/intel_pstate/run.sh @@ -30,9 +30,12 @@ EVALUATE_ONLY=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then echo "$0 # Skipped: Test can only run on x86 architectures." - exit 0 + exit $ksft_skip fi max_cpus=$(($(nproc)-1)) -- GitLab From 233cba023484e3f4ff68d01204eae85ad427ec0d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 15 May 2018 11:07:01 +0200 Subject: [PATCH 0428/1291] PCI: Fix devm_pci_alloc_host_bridge() memory leak [ Upstream commit 3bbce531788719749520f28052cabdef16af6b16 ] Fix a memory leak by freeing the PCI resource list in devm_pci_release_host_bridge_dev(). Fixes: 5c3f18cce083 ("PCI: Add devm_pci_alloc_host_bridge() interface") Signed-off-by: Jan Kiszka Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/probe.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f285cd74088e..4bccaf688aad 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -516,12 +516,14 @@ static void devm_pci_release_host_bridge_dev(struct device *dev) if (bridge->release_fn) bridge->release_fn(bridge); + + pci_free_resource_list(&bridge->windows); } static void pci_release_host_bridge_dev(struct device *dev) { devm_pci_release_host_bridge_dev(dev); - pci_free_host_bridge(to_pci_host_bridge(dev)); + kfree(to_pci_host_bridge(dev)); } struct pci_host_bridge *pci_alloc_host_bridge(size_t priv) -- GitLab From 59b837d5927cfa03867931aed336ac0c9fa9700c Mon Sep 17 00:00:00 2001 From: Ethan Lien Date: Mon, 28 May 2018 13:48:20 +0800 Subject: [PATCH 0429/1291] btrfs: balance dirty metadata pages in btrfs_finish_ordered_io [ Upstream commit e73e81b6d0114d4a303205a952ab2e87c44bd279 ] [Problem description and how we fix it] We should balance dirty metadata pages at the end of btrfs_finish_ordered_io, since a small, unmergeable random write can potentially produce dirty metadata which is multiple times larger than the data itself. For example, a small, unmergeable 4KiB write may produce: 16KiB dirty leaf (and possibly 16KiB dirty node) in subvolume tree 16KiB dirty leaf (and possibly 16KiB dirty node) in checksum tree 16KiB dirty leaf (and possibly 16KiB dirty node) in extent tree Although we do call balance dirty pages in write side, but in the buffered write path, most metadata are dirtied only after we reach the dirty background limit (which by far only counts dirty data pages) and wakeup the flusher thread. If there are many small, unmergeable random writes spread in a large btree, we'll find a burst of dirty pages exceeds the dirty_bytes limit after we wakeup the flusher thread - which is not what we expect. In our machine, it caused out-of-memory problem since a page cannot be dropped if it is marked dirty. Someone may worry about we may sleep in btrfs_btree_balance_dirty_nodelay, but since we do btrfs_finish_ordered_io in a separate worker, it will not stop the flusher consuming dirty pages. Also, we use different worker for metadata writeback endio, sleep in btrfs_finish_ordered_io help us throttle the size of dirty metadata pages. [Reproduce steps] To reproduce the problem, we need to do 4KiB write randomly spread in a large btree. In our 2GiB RAM machine: 1) Create 4 subvolumes. 2) Run fio on each subvolume: [global] direct=0 rw=randwrite ioengine=libaio bs=4k iodepth=16 numjobs=1 group_reporting size=128G runtime=1800 norandommap time_based randrepeat=0 3) Take snapshot on each subvolume and repeat fio on existing files. 4) Repeat step (3) until we get large btrees. In our case, by observing btrfs_root_item->bytes_used, we have 2GiB of metadata in each subvolume tree and 12GiB of metadata in extent tree. 5) Stop all fio, take snapshot again, and wait until all delayed work is completed. 6) Start all fio. Few seconds later we hit OOM when the flusher starts to work. It can be reproduced even when using nocow write. Signed-off-by: Ethan Lien Reviewed-by: David Sterba [ add comment ] Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f5b90dc137ec..3d339b41f5b3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3162,6 +3162,9 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) /* once for the tree */ btrfs_put_ordered_extent(ordered_extent); + /* Try to release some metadata so we don't get an OOM but don't wait */ + btrfs_btree_balance_dirty_nodelay(fs_info); + return ret; } -- GitLab From 73425f6ad6ebaf08db227748c69dbd4bbfd09d0d Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Thu, 22 Mar 2018 14:14:45 +0200 Subject: [PATCH 0430/1291] iwlwifi: pcie: fix race in Rx buffer allocator [ Upstream commit 0f22e40053bd5378ad1e3250e65c574fd61c0cd6 ] Make sure the rx_allocator worker is canceled before running the rx_init routine. rx_init frees and re-allocates all rxb's pages. The rx_allocator worker also allocates pages for the used rxb's. Running rx_init and rx_allocator simultaniously causes a kernel panic. Fix that by canceling the work in rx_init. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index a06b6612b658..ca99c3cf41c2 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -901,6 +901,8 @@ static int _iwl_pcie_rx_init(struct iwl_trans *trans) } def_rxq = trans_pcie->rxq; + cancel_work_sync(&rba->rx_alloc); + spin_lock(&rba->lock); atomic_set(&rba->req_pending, 0); atomic_set(&rba->req_ready, 0); -- GitLab From 678e64c63217eb7a88eb77613979ac9037357cf7 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Tue, 29 May 2018 18:37:16 +0200 Subject: [PATCH 0431/1291] Bluetooth: hci_qca: Fix "Sleep inside atomic section" warning [ Upstream commit 9960521c44a5d828f29636ceac0600603ecbddbf ] This patch fixes the following warning during boot: do not call blocking ops when !TASK_RUNNING; state=1 set at [<(ptrval)>] qca_setup+0x194/0x750 [hci_uart] WARNING: CPU: 2 PID: 1878 at kernel/sched/core.c:6135 __might_sleep+0x7c/0x88 In qca_set_baudrate(), the current task state is set to TASK_UNINTERRUPTIBLE before going to sleep for 300ms. It was then restored to TASK_INTERRUPTIBLE. This patch sets the current task state back to TASK_RUNNING instead. Signed-off-by: Thierry Escande Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_qca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 6f4ebd5e54c8..a6173ddfb5a7 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -881,7 +881,7 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) */ set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(msecs_to_jiffies(BAUDRATE_SETTLE_TIMEOUT_MS)); - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_RUNNING); return 0; } -- GitLab From 5650a9be9635f574b8e5e8b38981324ffb283140 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Mon, 21 May 2018 18:09:20 +0800 Subject: [PATCH 0432/1291] Bluetooth: btusb: Add a new Realtek 8723DE ID 2ff8:b011 [ Upstream commit 66d9975c5a7c40aa7e4bb0ec0b0c37ba1f190923 ] Without this patch we cannot turn on the Bluethooth adapter on ASUS E406MA. T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2ff8 ProdID=b011 Rev= 2.00 S: Manufacturer=Realtek S: Product=802.11n WLAN Adapter S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Jian-Hong Pan Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 86d7975afaeb..d89a674604b8 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -373,6 +373,9 @@ static const struct usb_device_id blacklist_table[] = { /* Additional Realtek 8723BU Bluetooth devices */ { USB_DEVICE(0x7392, 0xa611), .driver_info = BTUSB_REALTEK }, + /* Additional Realtek 8723DE Bluetooth devices */ + { USB_DEVICE(0x2ff8, 0xb011), .driver_info = BTUSB_REALTEK }, + /* Additional Realtek 8821AE Bluetooth devices */ { USB_DEVICE(0x0b05, 0x17dc), .driver_info = BTUSB_REALTEK }, { USB_DEVICE(0x13d3, 0x3414), .driver_info = BTUSB_REALTEK }, -- GitLab From 3d4837733945a7d9505fd08db34b4188ca59e854 Mon Sep 17 00:00:00 2001 From: Kai Chieh Chuang Date: Mon, 28 May 2018 10:18:18 +0800 Subject: [PATCH 0433/1291] ASoC: dpcm: fix BE dai not hw_free and shutdown [ Upstream commit 9c0ac70ad24d76b873c1551e27790c7f6a815d5c ] In case, one BE is used by two FE1/FE2 FE1--->BE--> | FE2----] when FE1/FE2 call dpcm_be_dai_hw_free() together the BE users will be 2 (> 1), hence cannot be hw_free the be state will leave at, ex. SND_SOC_DPCM_STATE_STOP later FE1/FE2 call dpcm_be_dai_shutdown(), will be skip due to wrong state. leaving the BE not being hw_free and shutdown. The BE dai will be hw_free later when calling dpcm_be_dai_shutdown() if still in invalid state. Signed-off-by: KaiChieh Chuang Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-pcm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 94b88b897c3b..3d0dab8282ad 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1779,8 +1779,10 @@ int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream) continue; if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_FREE) && - (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN)) - continue; + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN)) { + soc_pcm_hw_free(be_substream); + be->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_FREE; + } dev_dbg(be->dev, "ASoC: close BE %s\n", be->dai_link->name); -- GitLab From 6648fdc71138caf906bfa0a96b383a7d0dee270f Mon Sep 17 00:00:00 2001 From: Vincent Palatin Date: Wed, 18 Apr 2018 12:23:58 +0200 Subject: [PATCH 0434/1291] mfd: cros_ec: Fail early if we cannot identify the EC [ Upstream commit 0dbbf25561b29ffab5ba6277429760abdf49ceff ] If we cannot communicate with the EC chip to detect the protocol version and its features, it's very likely useless to continue. Else we will commit all kind of uninformed mistakes (using the wrong protocol, the wrong buffer size, mixing the EC with other chips). Signed-off-by: Vincent Palatin Acked-by: Benson Leung Signed-off-by: Enric Balletbo i Serra Reviewed-by: Gwendal Grignou Reviewed-by: Andy Shevchenko Signed-off-by: Lee Jones Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/cros_ec.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c index b0ca5a4c841e..c5528ae982f2 100644 --- a/drivers/mfd/cros_ec.c +++ b/drivers/mfd/cros_ec.c @@ -112,7 +112,11 @@ int cros_ec_register(struct cros_ec_device *ec_dev) mutex_init(&ec_dev->lock); - cros_ec_query_all(ec_dev); + err = cros_ec_query_all(ec_dev); + if (err) { + dev_err(dev, "Cannot identify the EC: error %d\n", err); + return err; + } if (ec_dev->irq) { err = request_threaded_irq(ec_dev->irq, NULL, ec_irq_thread, -- GitLab From 852feed35dcdec8521c2f3227545ad2a187a2b3b Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Thu, 24 May 2018 19:18:27 +0530 Subject: [PATCH 0435/1291] mwifiex: handle race during mwifiex_usb_disconnect [ Upstream commit b817047ae70c0bd67b677b65d0d69d72cd6e9728 ] Race condition is observed during rmmod of mwifiex_usb: 1. The rmmod thread will call mwifiex_usb_disconnect(), download SHUTDOWN command and do wait_event_interruptible_timeout(), waiting for response. 2. The main thread will handle the response and will do a wake_up_interruptible(), unblocking rmmod thread. 3. On getting unblocked, rmmod thread will make rx_cmd.urb = NULL in mwifiex_usb_free(). 4. The main thread will try to resubmit rx_cmd.urb in mwifiex_usb_submit_rx_urb(), which is NULL. To fix, wait for main thread to complete before calling mwifiex_usb_free(). Signed-off-by: Ganapathi Bhat Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index f4f2b9b27e32..50890cab8807 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -644,6 +644,9 @@ static void mwifiex_usb_disconnect(struct usb_interface *intf) MWIFIEX_FUNC_SHUTDOWN); } + if (adapter->workqueue) + flush_workqueue(adapter->workqueue); + mwifiex_usb_free(card); mwifiex_dbg(adapter, FATAL, -- GitLab From 6f8688bf299b35b5d585a96fa670ea257257fbd3 Mon Sep 17 00:00:00 2001 From: Eyal Reizer Date: Mon, 28 May 2018 11:36:42 +0300 Subject: [PATCH 0436/1291] wlcore: sdio: check for valid platform device data before suspend [ Upstream commit 6e91d48371e79862ea2c05867aaebe4afe55a865 ] the wl pointer can be null In case only wlcore_sdio is probed while no WiLink module is successfully probed, as in the case of mounting a wl12xx module while using a device tree file configured with wl18xx related settings. In this case the system was crashing in wl1271_suspend() as platform device data is not set. Make sure wl the pointer is valid before using it. Signed-off-by: Eyal Reizer Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ti/wlcore/sdio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index f8a1fea64e25..219d1a86b92e 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -406,6 +406,11 @@ static int wl1271_suspend(struct device *dev) mmc_pm_flag_t sdio_flags; int ret = 0; + if (!wl) { + dev_err(dev, "no wilink module was probed\n"); + goto out; + } + dev_dbg(dev, "wl1271 suspend. wow_enabled: %d\n", wl->wow_enabled); -- GitLab From 1a95962bf210b6d763113d595e74223341492684 Mon Sep 17 00:00:00 2001 From: Fuyun Liang Date: Fri, 25 May 2018 19:43:02 +0100 Subject: [PATCH 0437/1291] net: hns3: Fixes the init of the VALID BD info in the descriptor [ Upstream commit 7d0b130cbbfa4651cc1ab9268a2956c1b9d82ff9 ] RX Buffer Descriptor contains a VALID bit which indicates if the BD is valid and has some data. This field is set by HNS3 hardware to intimate the driver of some valid data present in the BD. nd should be reset by the driver when BD is being used again. In the existing code this bit was not being (re-)initialized properly and hence was causing problems. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Fuyun Liang Signed-off-by: Peng Li Signed-off-by: Salil Mehta Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index d1e4dcec5db2..9f8c56f9e9c1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -1598,6 +1598,7 @@ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i, hns3_unmap_buffer(ring, &ring->desc_cb[i]); ring->desc_cb[i] = *res_cb; ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); + ring->desc[i].rx.bd_base_info = 0; } static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i) @@ -1605,6 +1606,7 @@ static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i) ring->desc_cb[i].reuse_flag = 0; ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma + ring->desc_cb[i].page_offset); + ring->desc[i].rx.bd_base_info = 0; } static void hns3_nic_reclaim_one_desc(struct hns3_enet_ring *ring, int *bytes, -- GitLab From b7ffc03574f84c616d64e00b4ec3c4c46743b95f Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 18 May 2018 17:07:48 -0400 Subject: [PATCH 0438/1291] media: tw686x: Fix incorrect vb2_mem_ops GFP flags [ Upstream commit 636757ab6c93e19e2f58d3b3af1312e34eaffbab ] When the driver is configured in the "memcpy" dma-mode, it uses vb2_vmalloc_memops, which is backed by a SLAB allocator and so shouldn't be using GFP_DMA32. Fix it. Signed-off-by: Ezequiel Garcia Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/tw686x/tw686x-video.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/pci/tw686x/tw686x-video.c b/drivers/media/pci/tw686x/tw686x-video.c index c3fafa97b2d0..0ea8dd44026c 100644 --- a/drivers/media/pci/tw686x/tw686x-video.c +++ b/drivers/media/pci/tw686x/tw686x-video.c @@ -1228,7 +1228,8 @@ int tw686x_video_init(struct tw686x_dev *dev) vc->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; vc->vidq.min_buffers_needed = 2; vc->vidq.lock = &vc->vb_mutex; - vc->vidq.gfp_flags = GFP_DMA32; + vc->vidq.gfp_flags = dev->dma_mode != TW686X_DMA_MODE_MEMCPY ? + GFP_DMA32 : 0; vc->vidq.dev = &dev->pci_dev->dev; err = vb2_queue_init(&vc->vidq); -- GitLab From 9aecdc961e6a863866dcbc389b4d5324937a73ad Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 21 May 2018 08:43:02 -0400 Subject: [PATCH 0439/1291] media: videobuf2-core: don't call memop 'finish' when queueing [ Upstream commit 90b2da89a083e1395cb322521a42397c49ae4500 ] When a buffer is queued or requeued in vb2_buffer_done, then don't call the finish memop. In this case the buffer is only returned to vb2, not to userspace. Calling 'finish' here will cause an unbalance when the queue is canceled, since the core will call the same memop again. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/videobuf2-core.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index ffbb178c6918..2dbf632c10de 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -912,9 +912,12 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state) dprintk(4, "done processing on buffer %d, state: %d\n", vb->index, state); - /* sync buffers */ - for (plane = 0; plane < vb->num_planes; ++plane) - call_void_memop(vb, finish, vb->planes[plane].mem_priv); + if (state != VB2_BUF_STATE_QUEUED && + state != VB2_BUF_STATE_REQUEUEING) { + /* sync buffers */ + for (plane = 0; plane < vb->num_planes; ++plane) + call_void_memop(vb, finish, vb->planes[plane].mem_priv); + } spin_lock_irqsave(&q->done_lock, flags); if (state == VB2_BUF_STATE_QUEUED || -- GitLab From ef61d940cd9684ebb910b12d4d0936c2ffc1f811 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 11 May 2018 13:13:35 -0700 Subject: [PATCH 0440/1291] Btrfs: don't return ino to ino cache if inode item removal fails [ Upstream commit c08db7d8d295a4f3a10faaca376de011afff7950 ] In btrfs_evict_inode(), if btrfs_truncate_inode_items() fails, the inode item will still be in the tree but we still return the ino to the ino cache. That will blow up later when someone tries to allocate that ino, so don't return it to the cache. Fixes: 581bb050941b ("Btrfs: Cache free inode numbers in memory") Reviewed-by: Josef Bacik Signed-off-by: Omar Sandoval Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3d339b41f5b3..e61044d14cd7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5499,13 +5499,18 @@ void btrfs_evict_inode(struct inode *inode) trans->block_rsv = rsv; ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); - if (ret != -ENOSPC && ret != -EAGAIN) + if (ret) { + trans->block_rsv = &fs_info->trans_block_rsv; + btrfs_end_transaction(trans); + btrfs_btree_balance_dirty(fs_info); + if (ret != -ENOSPC && ret != -EAGAIN) { + btrfs_orphan_del(NULL, BTRFS_I(inode)); + btrfs_free_block_rsv(fs_info, rsv); + goto no_delete; + } + } else { break; - - trans->block_rsv = &fs_info->trans_block_rsv; - btrfs_end_transaction(trans); - trans = NULL; - btrfs_btree_balance_dirty(fs_info); + } } btrfs_free_block_rsv(fs_info, rsv); @@ -5514,12 +5519,8 @@ void btrfs_evict_inode(struct inode *inode) * Errors here aren't a big deal, it just means we leave orphan items * in the tree. They will be cleaned up on the next mount. */ - if (ret == 0) { - trans->block_rsv = root->orphan_block_rsv; - btrfs_orphan_del(trans, BTRFS_I(inode)); - } else { - btrfs_orphan_del(NULL, BTRFS_I(inode)); - } + trans->block_rsv = root->orphan_block_rsv; + btrfs_orphan_del(trans, BTRFS_I(inode)); trans->block_rsv = &fs_info->trans_block_rsv; if (!(root == fs_info->tree_root || -- GitLab From 3bf165384e82f70571ba2f439391dab86eddb23d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 11 May 2018 13:13:31 -0700 Subject: [PATCH 0441/1291] Btrfs: don't BUG_ON() in btrfs_truncate_inode_items() [ Upstream commit 0552210997badb6a60740a26ff9d976a416510f0 ] btrfs_free_extent() can fail because of ENOMEM. There's no reason to panic here, we can just abort the transaction. Fixes: f4b9aa8d3b87 ("btrfs_truncate") Reviewed-by: Nikolay Borisov Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e61044d14cd7..28a58f40f3a4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4740,7 +4740,10 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, extent_num_bytes, 0, btrfs_header_owner(leaf), ino, extent_offset); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, ret); + break; + } if (btrfs_should_throttle_delayed_refs(trans, fs_info)) btrfs_async_run_delayed_refs(fs_info, trans->delayed_ref_updates * 2, -- GitLab From 31371d2dad493ce0bc7892cb6c1c3f5f397e12b9 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 24 Apr 2018 14:53:56 +0200 Subject: [PATCH 0442/1291] btrfs: add barriers to btrfs_sync_log before log_commit_wait wakeups [ Upstream commit 3d3a2e610ea5e7c6d4f9481ecce5d8e2d8317843 ] Currently the code assumes that there's an implied barrier by the sequence of code preceding the wakeup, namely the mutex unlock. As Nikolay pointed out: I think this is wrong (not your code) but the original assumption that the RELEASE semantics provided by mutex_unlock is sufficient. According to memory-barriers.txt: Section 'LOCK ACQUISITION FUNCTIONS' states: (2) RELEASE operation implication: Memory operations issued before the RELEASE will be completed before the RELEASE operation has completed. Memory operations issued after the RELEASE *may* be completed before the RELEASE operation has completed. (I've bolded the may portion) The example given there: As an example, consider the following: *A = a; *B = b; ACQUIRE *C = c; *D = d; RELEASE *E = e; *F = f; The following sequence of events is acceptable: ACQUIRE, {*F,*A}, *E, {*C,*D}, *B, RELEASE So if we assume that *C is modifying the flag which the waitqueue is checking, and *E is the actual wakeup, then those accesses can be re-ordered... IMHO this code should be considered broken... Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index bf4e22df7c97..e1b4a59485df 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3041,8 +3041,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_unlock(&log_root_tree->log_mutex); /* - * The barrier before waitqueue_active is implied by mutex_unlock + * The barrier before waitqueue_active is needed so all the updates + * above are seen by the woken threads. It might not be necessary, but + * proving that seems to be hard. */ + smp_mb(); if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) wake_up(&log_root_tree->log_commit_wait[index2]); out: @@ -3053,8 +3056,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_unlock(&root->log_mutex); /* - * The barrier before waitqueue_active is implied by mutex_unlock + * The barrier before waitqueue_active is needed so all the updates + * above are seen by the woken threads. It might not be necessary, but + * proving that seems to be hard. */ + smp_mb(); if (waitqueue_active(&root->log_commit_wait[index1])) wake_up(&root->log_commit_wait[index1]); return ret; -- GitLab From 2737a4adec5a8903e7fa2ddd8368dff1ad658fe7 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 14 May 2018 09:38:13 +0800 Subject: [PATCH 0443/1291] btrfs: qgroup: Finish rescan when hit the last leaf of extent tree [ Upstream commit ff3d27a048d926b3920ccdb75d98788c567cae0d ] Under the following case, qgroup rescan can double account cowed tree blocks: In this case, extent tree only has one tree block. - | transid=5 last committed=4 | btrfs_qgroup_rescan_worker() | |- btrfs_start_transaction() | | transid = 5 | |- qgroup_rescan_leaf() | |- btrfs_search_slot_for_read() on extent tree | Get the only extent tree block from commit root (transid = 4). | Scan it, set qgroup_rescan_progress to the last | EXTENT/META_ITEM + 1 | now qgroup_rescan_progress = A + 1. | | fs tree get CoWed, new tree block is at A + 16K | transid 5 get committed - | transid=6 last committed=5 | btrfs_qgroup_rescan_worker() | btrfs_qgroup_rescan_worker() | |- btrfs_start_transaction() | | transid = 5 | |- qgroup_rescan_leaf() | |- btrfs_search_slot_for_read() on extent tree | Get the only extent tree block from commit root (transid = 5). | scan it using qgroup_rescan_progress (A + 1). | found new tree block beyong A, and it's fs tree block, | account it to increase qgroup numbers. - In above case, tree block A, and tree block A + 16K get accounted twice, while qgroup rescan should stop when it already reach the last leaf, other than continue using its qgroup_rescan_progress. Such case could happen by just looping btrfs/017 and with some possibility it can hit such double qgroup accounting problem. Fix it by checking the path to determine if we should finish qgroup rescan, other than relying on next loop to exit. Reported-by: Nikolay Borisov Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index e172d4843eae..473ad5985aa3 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2499,6 +2499,21 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, spin_unlock(&fs_info->qgroup_lock); } +/* + * Check if the leaf is the last leaf. Which means all node pointers + * are at their last position. + */ +static bool is_last_leaf(struct btrfs_path *path) +{ + int i; + + for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { + if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1) + return false; + } + return true; +} + /* * returns < 0 on error, 0 when more leafs are to be scanned. * returns 1 when done. @@ -2512,6 +2527,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, struct ulist *roots = NULL; struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); u64 num_bytes; + bool done; int slot; int ret; @@ -2540,6 +2556,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, mutex_unlock(&fs_info->qgroup_rescan_lock); return ret; } + done = is_last_leaf(path); btrfs_item_key_to_cpu(path->nodes[0], &found, btrfs_header_nritems(path->nodes[0]) - 1); @@ -2586,6 +2603,8 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, } btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); + if (done && !ret) + ret = 1; return ret; } -- GitLab From 652b94b72696e6cbe7332a461b535657de23fcbe Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 24 May 2018 10:44:20 -0500 Subject: [PATCH 0444/1291] x86/microcode: Make the late update update_lock a raw lock for RT [ Upstream commit ff987fcf011d20c53b3a613edf6e2055ea48e26e ] __reload_late() is called from stop_machine context and thus cannot acquire a non-raw spinlock on PREEMPT_RT. Signed-off-by: Scott Wood Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Acked-by: Thomas Gleixner Cc: Ashok Raj Cc: Clark Williams Cc: Pei Zhang Cc: x86-ml Link: http://lkml.kernel.org/r/20180524154420.24455-1-swood@redhat.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index c8e0cda0f272..4fc0e08a30b9 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -70,7 +70,7 @@ static DEFINE_MUTEX(microcode_mutex); /* * Serialize late loading so that CPUs get updated one-by-one. */ -static DEFINE_SPINLOCK(update_lock); +static DEFINE_RAW_SPINLOCK(update_lock); struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; @@ -560,9 +560,9 @@ static int __reload_late(void *info) if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC)) return -1; - spin_lock(&update_lock); + raw_spin_lock(&update_lock); apply_microcode_local(&err); - spin_unlock(&update_lock); + raw_spin_unlock(&update_lock); /* siblings return UCODE_OK because their engine got updated already */ if (err > UCODE_NFOUND) { -- GitLab From d93663a78783621895afd0999272c2ab8bad82cd Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 25 May 2018 11:46:48 +0200 Subject: [PATCH 0445/1291] PM / wakeup: Make s2idle_lock a RAW_SPINLOCK [ Upstream commit 62fc00a6611a0014c85763f9def1fc07c15d1302 ] The `s2idle_lock' is acquired during suspend while interrupts are disabled even on RT. The lock is acquired for short sections only. Make it a RAW lock which avoids "sleeping while atomic" warnings on RT. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/power/suspend.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 0685c4499431..c0bc2c89697a 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -60,7 +60,7 @@ static const struct platform_s2idle_ops *s2idle_ops; static DECLARE_WAIT_QUEUE_HEAD(s2idle_wait_head); enum s2idle_states __read_mostly s2idle_state; -static DEFINE_SPINLOCK(s2idle_lock); +static DEFINE_RAW_SPINLOCK(s2idle_lock); void s2idle_set_ops(const struct platform_s2idle_ops *ops) { @@ -78,12 +78,12 @@ static void s2idle_enter(void) { trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_TO_IDLE, true); - spin_lock_irq(&s2idle_lock); + raw_spin_lock_irq(&s2idle_lock); if (pm_wakeup_pending()) goto out; s2idle_state = S2IDLE_STATE_ENTER; - spin_unlock_irq(&s2idle_lock); + raw_spin_unlock_irq(&s2idle_lock); get_online_cpus(); cpuidle_resume(); @@ -97,11 +97,11 @@ static void s2idle_enter(void) cpuidle_pause(); put_online_cpus(); - spin_lock_irq(&s2idle_lock); + raw_spin_lock_irq(&s2idle_lock); out: s2idle_state = S2IDLE_STATE_NONE; - spin_unlock_irq(&s2idle_lock); + raw_spin_unlock_irq(&s2idle_lock); trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_TO_IDLE, false); } @@ -156,12 +156,12 @@ void s2idle_wake(void) { unsigned long flags; - spin_lock_irqsave(&s2idle_lock, flags); + raw_spin_lock_irqsave(&s2idle_lock, flags); if (s2idle_state > S2IDLE_STATE_NONE) { s2idle_state = S2IDLE_STATE_WAKE; wake_up(&s2idle_wait_head); } - spin_unlock_irqrestore(&s2idle_lock, flags); + raw_spin_unlock_irqrestore(&s2idle_lock, flags); } EXPORT_SYMBOL_GPL(s2idle_wake); -- GitLab From 2040b0d8c9c0aa2e50b733c0ecd12d8605d8a66d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 May 2018 18:56:24 +0200 Subject: [PATCH 0446/1291] PCI: Prevent sysfs disable of device while driver is attached [ Upstream commit 6f5cdfa802733dcb561bf664cc89d203f2fd958f ] Manipulating the enable_cnt behind the back of the driver will wreak complete havoc with the kernel state, so disallow it. Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas Reviewed-by: Johannes Thumshirn Acked-by: Keith Busch Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-sysfs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 00fa4278c1f4..c3f0473d1afa 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -305,13 +305,16 @@ static ssize_t enable_store(struct device *dev, struct device_attribute *attr, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!val) { - if (pci_is_enabled(pdev)) - pci_disable_device(pdev); - else - result = -EIO; - } else + device_lock(dev); + if (dev->driver) + result = -EBUSY; + else if (val) result = pci_enable_device(pdev); + else if (pci_is_enabled(pdev)) + pci_disable_device(pdev); + else + result = -EIO; + device_unlock(dev); return result < 0 ? result : count; } -- GitLab From 4bb1acf80c7fb05994955a26d7e40594ff6e9a9a Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Thu, 24 May 2018 09:27:38 +0800 Subject: [PATCH 0447/1291] nvme-rdma: stop admin queue before freeing it [ Upstream commit 2e050f00a0f0e07467050cb4afae0234941e5bf3 ] For any failure after nvme_rdma_start_queue in nvme_rdma_configure_admin_queue, the admin queue will be freed with the NVME_RDMA_Q_LIVE flag still set. Once nvme_rdma_stop_queue is invoked, that will cause a use-after-free. BUG: KASAN: use-after-free in rdma_disconnect+0x1f/0xe0 [rdma_cm] To fix it, call nvme_rdma_stop_queue for all the failed cases after nvme_rdma_start_queue. Signed-off-by: Jianchao Wang Suggested-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/rdma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 93a082e0bdd4..48a831d58e7a 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -796,7 +796,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (error) { dev_err(ctrl->ctrl.device, "prop_get NVME_REG_CAP failed\n"); - goto out_cleanup_queue; + goto out_stop_queue; } ctrl->ctrl.sqsize = @@ -804,23 +804,25 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); if (error) - goto out_cleanup_queue; + goto out_stop_queue; ctrl->ctrl.max_hw_sectors = (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); error = nvme_init_identify(&ctrl->ctrl); if (error) - goto out_cleanup_queue; + goto out_stop_queue; error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); if (error) - goto out_cleanup_queue; + goto out_stop_queue; return 0; +out_stop_queue: + nvme_rdma_stop_queue(&ctrl->queues[0]); out_cleanup_queue: if (new) blk_cleanup_queue(ctrl->ctrl.admin_q); -- GitLab From 1afb8720b6943afe538b775fe3acbf367b772f4b Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 24 May 2018 16:16:04 -0600 Subject: [PATCH 0448/1291] nvme-pci: Fix AER reset handling [ Upstream commit 72cd4cc28e234ed7189ee508ed65ab60c80a97c8 ] The nvme timeout handling doesn't do anything if the pci channel is offline, which is the case when recovering from PCI error event, so it was a bad idea to sync the controller reset in this state. This patch flushes the reset work in the error_resume callback instead when the channel is back to online. This keeps AER handling serialized and can recover from timeouts. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199757 Fixes: cc1d5e749a2e ("nvme/pci: Sync controller reset for AER slot_reset") Reported-by: Alex Gagniuc Tested-by: Alex Gagniuc Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4cac4755abef..6a76e3974240 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2519,6 +2519,9 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) static void nvme_error_resume(struct pci_dev *pdev) { + struct nvme_dev *dev = pci_get_drvdata(pdev); + + flush_work(&dev->ctrl.reset_work); pci_cleanup_aer_uncorrect_error_status(pdev); } -- GitLab From be1c9763cbc749d1d646d0797ff07da24d4bdb46 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:11:30 +0300 Subject: [PATCH 0449/1291] ath: Add regulatory mapping for FCC3_ETSIC [ Upstream commit 01fb2994a98dc72c8818c274f7b5983d5dd885c7 ] The regdomain code is used to select the correct the correct conformance test limits (CTL) for a country. If the regdomain code isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this regdomain code are: * 2.4GHz: ETSI * 5GHz: FCC Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd_common.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index bdd2b4d61f2f..7d955fa8c24c 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -35,6 +35,7 @@ enum EnumRd { FRANCE_RES = 0x31, FCC3_FCCA = 0x3A, FCC3_WORLD = 0x3B, + FCC3_ETSIC = 0x3F, ETSI1_WORLD = 0x37, ETSI3_ETSIA = 0x32, @@ -168,6 +169,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = { {FCC2_ETSIC, CTL_FCC, CTL_ETSI}, {FCC3_FCCA, CTL_FCC, CTL_FCC}, {FCC3_WORLD, CTL_FCC, CTL_ETSI}, + {FCC3_ETSIC, CTL_FCC, CTL_ETSI}, {FCC4_FCCA, CTL_FCC, CTL_FCC}, {FCC5_FCCA, CTL_FCC, CTL_FCC}, {FCC6_FCCA, CTL_FCC, CTL_FCC}, -- GitLab From f198926d6a408d66d01ee59bf3db48361423becb Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:11:18 +0300 Subject: [PATCH 0450/1291] ath: Add regulatory mapping for ETSI8_WORLD [ Upstream commit 45faf6e096da8bb80e1ddf8c08a26a9601d9469e ] The regdomain code is used to select the correct the correct conformance test limits (CTL) for a country. If the regdomain code isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this regdomain code are: * 2.4GHz: ETSI * 5GHz: ETSI Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd_common.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index 7d955fa8c24c..7c0fcbbf1900 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -45,6 +45,7 @@ enum EnumRd { ETSI4_ETSIC = 0x38, ETSI5_WORLD = 0x39, ETSI6_WORLD = 0x34, + ETSI8_WORLD = 0x3D, ETSI_RESERVED = 0x33, MKK1_MKKA = 0x40, @@ -181,6 +182,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = { {ETSI4_WORLD, CTL_ETSI, CTL_ETSI}, {ETSI5_WORLD, CTL_ETSI, CTL_ETSI}, {ETSI6_WORLD, CTL_ETSI, CTL_ETSI}, + {ETSI8_WORLD, CTL_ETSI, CTL_ETSI}, /* XXX: For ETSI3_ETSIA, Was NO_CTL meant for the 2 GHz band ? */ {ETSI3_ETSIA, CTL_ETSI, CTL_ETSI}, -- GitLab From cdd106451c6a8b7c56b2284ac51aac4d23e5854b Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:11:14 +0300 Subject: [PATCH 0451/1291] ath: Add regulatory mapping for APL13_WORLD [ Upstream commit 9ba8df0c52b3e6baa436374b429d3d73bd09a320 ] The regdomain code is used to select the correct the correct conformance test limits (CTL) for a country. If the regdomain code isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this regdomain code are: * 2.4GHz: ETSI * 5GHz: ETSI Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd_common.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index 7c0fcbbf1900..2c873840a46a 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -69,6 +69,7 @@ enum EnumRd { APL1_ETSIC = 0x55, APL2_ETSIC = 0x56, APL5_WORLD = 0x58, + APL13_WORLD = 0x5A, APL6_WORLD = 0x5B, APL7_FCCA = 0x5C, APL8_WORLD = 0x5D, @@ -195,6 +196,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = { {APL3_WORLD, CTL_FCC, CTL_ETSI}, {APL4_WORLD, CTL_FCC, CTL_ETSI}, {APL5_WORLD, CTL_FCC, CTL_ETSI}, + {APL13_WORLD, CTL_ETSI, CTL_ETSI}, {APL6_WORLD, CTL_ETSI, CTL_ETSI}, {APL8_WORLD, CTL_ETSI, CTL_ETSI}, {APL9_WORLD, CTL_ETSI, CTL_ETSI}, -- GitLab From 3c51226005d940f901689ae76a01d73ae29f8e52 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:11:05 +0300 Subject: [PATCH 0452/1291] ath: Add regulatory mapping for APL2_FCCA [ Upstream commit 4f183687e3fad3ce0e06e38976cad81bc4541990 ] The regdomain code is used to select the correct the correct conformance test limits (CTL) for a country. If the regdomain code isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this regdomain code are: * 2.4GHz: FCC * 5GHz: FCC Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd_common.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index 2c873840a46a..d8a7db4976f0 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -61,6 +61,7 @@ enum EnumRd { MKK1_MKKA1 = 0x4A, MKK1_MKKA2 = 0x4B, MKK1_MKKC = 0x4C, + APL2_FCCA = 0x4D, APL3_FCCA = 0x50, APL1_WORLD = 0x52, @@ -193,6 +194,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = { {FCC1_FCCA, CTL_FCC, CTL_FCC}, {APL1_WORLD, CTL_FCC, CTL_ETSI}, {APL2_WORLD, CTL_FCC, CTL_ETSI}, + {APL2_FCCA, CTL_FCC, CTL_FCC}, {APL3_WORLD, CTL_FCC, CTL_ETSI}, {APL4_WORLD, CTL_FCC, CTL_ETSI}, {APL5_WORLD, CTL_FCC, CTL_ETSI}, -- GitLab From 5e895c9cb8ca4c9b0c99cc48a3673e13a5020537 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:10:54 +0300 Subject: [PATCH 0453/1291] ath: Add regulatory mapping for Uganda [ Upstream commit 1ea3986ad2bc72081c69f3fbc1e5e0eeb3c44f17 ] The country code is used by the ath to detect the ISO 3166-1 alpha-2 name and to select the correct conformance test limits (CTL) for a country. If the country isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this country are: * 2.4GHz: ETSI * 5GHz: FCC Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd.h | 1 + drivers/net/wireless/ath/regd_common.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h index 5d80be213fac..3555f5170cc6 100644 --- a/drivers/net/wireless/ath/regd.h +++ b/drivers/net/wireless/ath/regd.h @@ -175,6 +175,7 @@ enum CountryCode { CTRY_TUNISIA = 788, CTRY_TURKEY = 792, CTRY_UAE = 784, + CTRY_UGANDA = 800, CTRY_UKRAINE = 804, CTRY_UNITED_KINGDOM = 826, CTRY_UNITED_STATES = 840, diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index d8a7db4976f0..cba1020bc854 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -467,6 +467,7 @@ static struct country_code_to_enum_rd allCountries[] = { {CTRY_TRINIDAD_Y_TOBAGO, FCC3_WORLD, "TT"}, {CTRY_TUNISIA, ETSI3_WORLD, "TN"}, {CTRY_TURKEY, ETSI3_WORLD, "TR"}, + {CTRY_UGANDA, FCC3_WORLD, "UG"}, {CTRY_UKRAINE, NULL1_WORLD, "UA"}, {CTRY_UAE, NULL1_WORLD, "AE"}, {CTRY_UNITED_KINGDOM, ETSI1_WORLD, "GB"}, -- GitLab From aafbcce424ecba10419a6303a67a184358f577e0 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:10:48 +0300 Subject: [PATCH 0454/1291] ath: Add regulatory mapping for Tanzania [ Upstream commit 667ddac5745fb9fddfe8f7fd2523070f50bd4442 ] The country code is used by the ath to detect the ISO 3166-1 alpha-2 name and to select the correct conformance test limits (CTL) for a country. If the country isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this country are: * 2.4GHz: ETSI * 5GHz: FCC Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd.h | 1 + drivers/net/wireless/ath/regd_common.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h index 3555f5170cc6..74f745d9fab3 100644 --- a/drivers/net/wireless/ath/regd.h +++ b/drivers/net/wireless/ath/regd.h @@ -170,6 +170,7 @@ enum CountryCode { CTRY_SWITZERLAND = 756, CTRY_SYRIA = 760, CTRY_TAIWAN = 158, + CTRY_TANZANIA = 834, CTRY_THAILAND = 764, CTRY_TRINIDAD_Y_TOBAGO = 780, CTRY_TUNISIA = 788, diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index cba1020bc854..b85dc86cc188 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -463,6 +463,7 @@ static struct country_code_to_enum_rd allCountries[] = { {CTRY_SWITZERLAND, ETSI1_WORLD, "CH"}, {CTRY_SYRIA, NULL1_WORLD, "SY"}, {CTRY_TAIWAN, APL3_FCCA, "TW"}, + {CTRY_TANZANIA, APL1_WORLD, "TZ"}, {CTRY_THAILAND, FCC3_WORLD, "TH"}, {CTRY_TRINIDAD_Y_TOBAGO, FCC3_WORLD, "TT"}, {CTRY_TUNISIA, ETSI3_WORLD, "TN"}, -- GitLab From c4b93c91bb63b5e3de43df9015bf3a1b0ddf7a47 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:10:43 +0300 Subject: [PATCH 0455/1291] ath: Add regulatory mapping for Serbia [ Upstream commit 2a3169a54bb53717928392a04fb84deb765b51f1 ] The country code is used by the ath to detect the ISO 3166-1 alpha-2 name and to select the correct conformance test limits (CTL) for a country. If the country isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this country are: * 2.4GHz: ETSI * 5GHz: ETSI Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd.h | 1 + drivers/net/wireless/ath/regd_common.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h index 74f745d9fab3..b5764a38f90c 100644 --- a/drivers/net/wireless/ath/regd.h +++ b/drivers/net/wireless/ath/regd.h @@ -159,6 +159,7 @@ enum CountryCode { CTRY_ROMANIA = 642, CTRY_RUSSIA = 643, CTRY_SAUDI_ARABIA = 682, + CTRY_SERBIA = 688, CTRY_SERBIA_MONTENEGRO = 891, CTRY_SINGAPORE = 702, CTRY_SLOVAKIA = 703, diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index b85dc86cc188..1ced5a323cf8 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -452,6 +452,7 @@ static struct country_code_to_enum_rd allCountries[] = { {CTRY_ROMANIA, NULL1_WORLD, "RO"}, {CTRY_RUSSIA, NULL1_WORLD, "RU"}, {CTRY_SAUDI_ARABIA, NULL1_WORLD, "SA"}, + {CTRY_SERBIA, ETSI1_WORLD, "RS"}, {CTRY_SERBIA_MONTENEGRO, ETSI1_WORLD, "CS"}, {CTRY_SINGAPORE, APL6_WORLD, "SG"}, {CTRY_SLOVAKIA, ETSI1_WORLD, "SK"}, -- GitLab From 936058197fbb3ecf62bfa8a5eb493bd34d6eff72 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:09:59 +0300 Subject: [PATCH 0456/1291] ath: Add regulatory mapping for Bermuda [ Upstream commit 9c790f2d234f65697e3b0948adbfdf36dbe63dd7 ] The country code is used by the ath to detect the ISO 3166-1 alpha-2 name and to select the correct conformance test limits (CTL) for a country. If the country isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this country are: * 2.4GHz: FCC * 5GHz: FCC Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd.h | 1 + drivers/net/wireless/ath/regd_common.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h index b5764a38f90c..ab6b006d1534 100644 --- a/drivers/net/wireless/ath/regd.h +++ b/drivers/net/wireless/ath/regd.h @@ -74,6 +74,7 @@ enum CountryCode { CTRY_BELARUS = 112, CTRY_BELGIUM = 56, CTRY_BELIZE = 84, + CTRY_BERMUDA = 60, CTRY_BOLIVIA = 68, CTRY_BOSNIA_HERZ = 70, CTRY_BRAZIL = 76, diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index 1ced5a323cf8..e13b96e45d53 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -313,6 +313,7 @@ static struct country_code_to_enum_rd allCountries[] = { {CTRY_BELGIUM, ETSI1_WORLD, "BE"}, {CTRY_BELGIUM2, ETSI4_WORLD, "BL"}, {CTRY_BELIZE, APL1_ETSIC, "BZ"}, + {CTRY_BERMUDA, FCC3_FCCA, "BM"}, {CTRY_BOLIVIA, APL1_ETSIC, "BO"}, {CTRY_BOSNIA_HERZ, ETSI1_WORLD, "BA"}, {CTRY_BRAZIL, FCC3_WORLD, "BR"}, -- GitLab From 935674605cf8db94d85a504cb3c4e4587052aeb8 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:09:53 +0300 Subject: [PATCH 0457/1291] ath: Add regulatory mapping for Bahamas [ Upstream commit 699e2302c286a14afe7b7394151ce6c4e1790cc1 ] The country code is used by the ath to detect the ISO 3166-1 alpha-2 name and to select the correct conformance test limits (CTL) for a country. If the country isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this country are: * 2.4GHz: ETSI * 5GHz: FCC Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd.h | 1 + drivers/net/wireless/ath/regd_common.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h index ab6b006d1534..869f276cc1d8 100644 --- a/drivers/net/wireless/ath/regd.h +++ b/drivers/net/wireless/ath/regd.h @@ -68,6 +68,7 @@ enum CountryCode { CTRY_AUSTRALIA = 36, CTRY_AUSTRIA = 40, CTRY_AZERBAIJAN = 31, + CTRY_BAHAMAS = 44, CTRY_BAHRAIN = 48, CTRY_BANGLADESH = 50, CTRY_BARBADOS = 52, diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index e13b96e45d53..15bbd1e0d912 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -306,6 +306,7 @@ static struct country_code_to_enum_rd allCountries[] = { {CTRY_AUSTRALIA2, FCC6_WORLD, "AU"}, {CTRY_AUSTRIA, ETSI1_WORLD, "AT"}, {CTRY_AZERBAIJAN, ETSI4_WORLD, "AZ"}, + {CTRY_BAHAMAS, FCC3_WORLD, "BS"}, {CTRY_BAHRAIN, APL6_WORLD, "BH"}, {CTRY_BANGLADESH, NULL1_WORLD, "BD"}, {CTRY_BARBADOS, FCC2_WORLD, "BB"}, -- GitLab From c0ff32b7e85238baf1172304be65a256e6adca83 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 22 Mar 2018 21:20:03 +0100 Subject: [PATCH 0458/1291] powerpc/32: Add a missing include header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c89ca593220931c150cffda24b4d4ccf82f13fc8 ] The header file was missing from the includes. Fix the following warning, treated as error with W=1: arch/powerpc/kernel/pci_32.c:286:6: error: no previous prototype for ‘sys_pciconfig_iobase’ [-Werror=missing-prototypes] Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/pci_32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 1d817f4d97d9..2094f2b249fd 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include -- GitLab From ee8c480702a62eb757effc81a6868407248363e3 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 22 Mar 2018 21:19:56 +0100 Subject: [PATCH 0459/1291] powerpc/chrp/time: Make some functions static, add missing header include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b87a358b4a1421abd544c0b554b1b7159b2b36c0 ] Add a missing include . These functions can all be static, make it so. Fix warnings treated as errors with W=1: arch/powerpc/platforms/chrp/time.c:41:13: error: no previous prototype for ‘chrp_time_init’ [-Werror=missing-prototypes] arch/powerpc/platforms/chrp/time.c:66:5: error: no previous prototype for ‘chrp_cmos_clock_read’ [-Werror=missing-prototypes] arch/powerpc/platforms/chrp/time.c:74:6: error: no previous prototype for ‘chrp_cmos_clock_write’ [-Werror=missing-prototypes] arch/powerpc/platforms/chrp/time.c:86:5: error: no previous prototype for ‘chrp_set_rtc_time’ [-Werror=missing-prototypes] arch/powerpc/platforms/chrp/time.c:130:6: error: no previous prototype for ‘chrp_get_rtc_time’ [-Werror=missing-prototypes] Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/chrp/time.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c index 03d115aaa191..acde7bbe0716 100644 --- a/arch/powerpc/platforms/chrp/time.c +++ b/arch/powerpc/platforms/chrp/time.c @@ -28,6 +28,8 @@ #include #include +#include + extern spinlock_t rtc_lock; #define NVRAM_AS0 0x74 @@ -63,7 +65,7 @@ long __init chrp_time_init(void) return 0; } -int chrp_cmos_clock_read(int addr) +static int chrp_cmos_clock_read(int addr) { if (nvram_as1 != 0) outb(addr>>8, nvram_as1); @@ -71,7 +73,7 @@ int chrp_cmos_clock_read(int addr) return (inb(nvram_data)); } -void chrp_cmos_clock_write(unsigned long val, int addr) +static void chrp_cmos_clock_write(unsigned long val, int addr) { if (nvram_as1 != 0) outb(addr>>8, nvram_as1); -- GitLab From d26f8af1a126c81b9b28ad7cf05fe946c845c428 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 4 Apr 2018 22:13:05 +0200 Subject: [PATCH 0460/1291] powerpc/powermac: Add missing prototype for note_bootable_part() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f72cf3f1d49f2c35d6cb682af2e8c93550f264e4 ] Add a missing prototype for function `note_bootable_part` to silence a warning treated as error with W=1: arch/powerpc/platforms/powermac/setup.c:361:12: error: no previous prototype for ‘note_bootable_part’ [-Werror=missing-prototypes] Suggested-by: Christophe Leroy Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powermac/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index ab668cb72263..8b2eab1340f4 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -352,6 +352,7 @@ static int pmac_late_init(void) } machine_late_initcall(powermac, pmac_late_init); +void note_bootable_part(dev_t dev, int part, int goodness); /* * This is __ref because we check for "initializing" before * touching any of the __init sensitive things and "initializing" -- GitLab From 566be4969b839411244da8a80342e8da199d03d8 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 4 Apr 2018 22:07:46 +0200 Subject: [PATCH 0461/1291] powerpc/powermac: Mark variable x as unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5a4b475cf8511da721f20ba432c244061db7139f ] Since the value of x is never intended to be read, declare it with gcc attribute as unused. Fix warning treated as error with W=1: arch/powerpc/platforms/powermac/bootx_init.c:471:21: error: variable ‘x’ set but not used [-Werror=unused-but-set-variable] Suggested-by: Christophe Leroy Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powermac/bootx_init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index c3c9bbb3573a..ba0964c17620 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -468,7 +468,7 @@ void __init bootx_init(unsigned long r3, unsigned long r4) boot_infos_t *bi = (boot_infos_t *) r4; unsigned long hdr; unsigned long space; - unsigned long ptr, x; + unsigned long ptr; char *model; unsigned long offset = reloc_offset(); @@ -562,6 +562,8 @@ void __init bootx_init(unsigned long r3, unsigned long r4) * MMU switched OFF, so this should not be useful anymore. */ if (bi->version < 4) { + unsigned long x __maybe_unused; + bootx_printf("Touching pages...\n"); /* -- GitLab From 7eda318729800c9ef64483da1b9fea194edbb134 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Fri, 6 Apr 2018 22:12:19 +0200 Subject: [PATCH 0462/1291] powerpc: Add __printf verification to prom_printf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit eae5f709a4d738c52b6ab636981755d76349ea9e ] __printf is useful to verify format and arguments. Fix arg mismatch reported by gcc, remove the following warnings (with W=1): arch/powerpc/kernel/prom_init.c:1467:31: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1471:31: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1504:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1505:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1506:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1507:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1508:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1509:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1975:39: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 2 has type ‘unsigned int’ arch/powerpc/kernel/prom_init.c:1986:27: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:2567:38: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:2567:46: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 3 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:2569:38: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:2569:46: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 3 has type ‘long unsigned int’ The patch also include arg mismatch fix for case with #define DEBUG_PROM (warning not listed here). This patch fix also the following warnings revealed by checkpatch: WARNING: Prefer using '"%s...", __func__' to using 'alloc_up', this function's name, in a string #101: FILE: arch/powerpc/kernel/prom_init.c:1235: + prom_debug("alloc_up(%lx, %lx)\n", size, align); and WARNING: Prefer using '"%s...", __func__' to using 'alloc_down', this function's name, in a string #138: FILE: arch/powerpc/kernel/prom_init.c:1278: + prom_debug("alloc_down(%lx, %lx, %s)\n", size, align, Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/prom_init.c | 114 ++++++++++++++++---------------- 1 file changed, 58 insertions(+), 56 deletions(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 02190e90c7ae..f8782c7ef50f 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -334,6 +334,7 @@ static void __init prom_print_dec(unsigned long val) call_prom("write", 3, 1, prom.stdout, buf+i, size); } +__printf(1, 2) static void __init prom_printf(const char *format, ...) { const char *p, *q, *s; @@ -1148,7 +1149,7 @@ static void __init prom_send_capabilities(void) */ cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); - prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", + prom_printf("Max number of cores passed to firmware: %u (NR_CPUS = %d)\n", cores, NR_CPUS); ibm_architecture_vec.vec5.max_cpus = cpu_to_be32(cores); @@ -1230,7 +1231,7 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align) if (align) base = _ALIGN_UP(base, align); - prom_debug("alloc_up(%x, %x)\n", size, align); + prom_debug("%s(%lx, %lx)\n", __func__, size, align); if (ram_top == 0) prom_panic("alloc_up() called with mem not initialized\n"); @@ -1241,7 +1242,7 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align) for(; (base + size) <= alloc_top; base = _ALIGN_UP(base + 0x100000, align)) { - prom_debug(" trying: 0x%x\n\r", base); + prom_debug(" trying: 0x%lx\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); if (addr != PROM_ERROR && addr != 0) break; @@ -1253,12 +1254,12 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align) return 0; alloc_bottom = addr + size; - prom_debug(" -> %x\n", addr); - prom_debug(" alloc_bottom : %x\n", alloc_bottom); - prom_debug(" alloc_top : %x\n", alloc_top); - prom_debug(" alloc_top_hi : %x\n", alloc_top_high); - prom_debug(" rmo_top : %x\n", rmo_top); - prom_debug(" ram_top : %x\n", ram_top); + prom_debug(" -> %lx\n", addr); + prom_debug(" alloc_bottom : %lx\n", alloc_bottom); + prom_debug(" alloc_top : %lx\n", alloc_top); + prom_debug(" alloc_top_hi : %lx\n", alloc_top_high); + prom_debug(" rmo_top : %lx\n", rmo_top); + prom_debug(" ram_top : %lx\n", ram_top); return addr; } @@ -1273,7 +1274,7 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, { unsigned long base, addr = 0; - prom_debug("alloc_down(%x, %x, %s)\n", size, align, + prom_debug("%s(%lx, %lx, %s)\n", __func__, size, align, highmem ? "(high)" : "(low)"); if (ram_top == 0) prom_panic("alloc_down() called with mem not initialized\n"); @@ -1301,7 +1302,7 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, base = _ALIGN_DOWN(alloc_top - size, align); for (; base > alloc_bottom; base = _ALIGN_DOWN(base - 0x100000, align)) { - prom_debug(" trying: 0x%x\n\r", base); + prom_debug(" trying: 0x%lx\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); if (addr != PROM_ERROR && addr != 0) break; @@ -1312,12 +1313,12 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, alloc_top = addr; bail: - prom_debug(" -> %x\n", addr); - prom_debug(" alloc_bottom : %x\n", alloc_bottom); - prom_debug(" alloc_top : %x\n", alloc_top); - prom_debug(" alloc_top_hi : %x\n", alloc_top_high); - prom_debug(" rmo_top : %x\n", rmo_top); - prom_debug(" ram_top : %x\n", ram_top); + prom_debug(" -> %lx\n", addr); + prom_debug(" alloc_bottom : %lx\n", alloc_bottom); + prom_debug(" alloc_top : %lx\n", alloc_top); + prom_debug(" alloc_top_hi : %lx\n", alloc_top_high); + prom_debug(" rmo_top : %lx\n", rmo_top); + prom_debug(" ram_top : %lx\n", ram_top); return addr; } @@ -1443,7 +1444,7 @@ static void __init prom_init_mem(void) if (size == 0) continue; - prom_debug(" %x %x\n", base, size); + prom_debug(" %lx %lx\n", base, size); if (base == 0 && (of_platform & PLATFORM_LPAR)) rmo_top = size; if ((base + size) > ram_top) @@ -1463,12 +1464,12 @@ static void __init prom_init_mem(void) if (prom_memory_limit) { if (prom_memory_limit <= alloc_bottom) { - prom_printf("Ignoring mem=%x <= alloc_bottom.\n", - prom_memory_limit); + prom_printf("Ignoring mem=%lx <= alloc_bottom.\n", + prom_memory_limit); prom_memory_limit = 0; } else if (prom_memory_limit >= ram_top) { - prom_printf("Ignoring mem=%x >= ram_top.\n", - prom_memory_limit); + prom_printf("Ignoring mem=%lx >= ram_top.\n", + prom_memory_limit); prom_memory_limit = 0; } else { ram_top = prom_memory_limit; @@ -1500,12 +1501,13 @@ static void __init prom_init_mem(void) alloc_bottom = PAGE_ALIGN(prom_initrd_end); prom_printf("memory layout at init:\n"); - prom_printf(" memory_limit : %x (16 MB aligned)\n", prom_memory_limit); - prom_printf(" alloc_bottom : %x\n", alloc_bottom); - prom_printf(" alloc_top : %x\n", alloc_top); - prom_printf(" alloc_top_hi : %x\n", alloc_top_high); - prom_printf(" rmo_top : %x\n", rmo_top); - prom_printf(" ram_top : %x\n", ram_top); + prom_printf(" memory_limit : %lx (16 MB aligned)\n", + prom_memory_limit); + prom_printf(" alloc_bottom : %lx\n", alloc_bottom); + prom_printf(" alloc_top : %lx\n", alloc_top); + prom_printf(" alloc_top_hi : %lx\n", alloc_top_high); + prom_printf(" rmo_top : %lx\n", rmo_top); + prom_printf(" ram_top : %lx\n", ram_top); } static void __init prom_close_stdin(void) @@ -1566,7 +1568,7 @@ static void __init prom_instantiate_opal(void) return; } - prom_printf("instantiating opal at 0x%x...", base); + prom_printf("instantiating opal at 0x%llx...", base); if (call_prom_ret("call-method", 4, 3, rets, ADDR("load-opal-runtime"), @@ -1582,10 +1584,10 @@ static void __init prom_instantiate_opal(void) reserve_mem(base, size); - prom_debug("opal base = 0x%x\n", base); - prom_debug("opal align = 0x%x\n", align); - prom_debug("opal entry = 0x%x\n", entry); - prom_debug("opal size = 0x%x\n", (long)size); + prom_debug("opal base = 0x%llx\n", base); + prom_debug("opal align = 0x%llx\n", align); + prom_debug("opal entry = 0x%llx\n", entry); + prom_debug("opal size = 0x%llx\n", size); prom_setprop(opal_node, "/ibm,opal", "opal-base-address", &base, sizeof(base)); @@ -1662,7 +1664,7 @@ static void __init prom_instantiate_rtas(void) prom_debug("rtas base = 0x%x\n", base); prom_debug("rtas entry = 0x%x\n", entry); - prom_debug("rtas size = 0x%x\n", (long)size); + prom_debug("rtas size = 0x%x\n", size); prom_debug("prom_instantiate_rtas: end...\n"); } @@ -1720,7 +1722,7 @@ static void __init prom_instantiate_sml(void) if (base == 0) prom_panic("Could not allocate memory for sml\n"); - prom_printf("instantiating sml at 0x%x...", base); + prom_printf("instantiating sml at 0x%llx...", base); memset((void *)base, 0, size); @@ -1739,8 +1741,8 @@ static void __init prom_instantiate_sml(void) prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-size", &size, sizeof(size)); - prom_debug("sml base = 0x%x\n", base); - prom_debug("sml size = 0x%x\n", (long)size); + prom_debug("sml base = 0x%llx\n", base); + prom_debug("sml size = 0x%x\n", size); prom_debug("prom_instantiate_sml: end...\n"); } @@ -1841,7 +1843,7 @@ static void __init prom_initialize_tce_table(void) prom_debug("TCE table: %s\n", path); prom_debug("\tnode = 0x%x\n", node); - prom_debug("\tbase = 0x%x\n", base); + prom_debug("\tbase = 0x%llx\n", base); prom_debug("\tsize = 0x%x\n", minsize); /* Initialize the table to have a one-to-one mapping @@ -1928,12 +1930,12 @@ static void __init prom_hold_cpus(void) } prom_debug("prom_hold_cpus: start...\n"); - prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); - prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); - prom_debug(" 1) acknowledge = 0x%x\n", + prom_debug(" 1) spinloop = 0x%lx\n", (unsigned long)spinloop); + prom_debug(" 1) *spinloop = 0x%lx\n", *spinloop); + prom_debug(" 1) acknowledge = 0x%lx\n", (unsigned long)acknowledge); - prom_debug(" 1) *acknowledge = 0x%x\n", *acknowledge); - prom_debug(" 1) secondary_hold = 0x%x\n", secondary_hold); + prom_debug(" 1) *acknowledge = 0x%lx\n", *acknowledge); + prom_debug(" 1) secondary_hold = 0x%lx\n", secondary_hold); /* Set the common spinloop variable, so all of the secondary cpus * will block when they are awakened from their OF spinloop. @@ -1961,7 +1963,7 @@ static void __init prom_hold_cpus(void) prom_getprop(node, "reg", ®, sizeof(reg)); cpu_no = be32_to_cpu(reg); - prom_debug("cpu hw idx = %lu\n", cpu_no); + prom_debug("cpu hw idx = %u\n", cpu_no); /* Init the acknowledge var which will be reset by * the secondary cpu when it awakens from its OF @@ -1971,7 +1973,7 @@ static void __init prom_hold_cpus(void) if (cpu_no != prom.cpu) { /* Primary Thread of non-boot cpu or any thread */ - prom_printf("starting cpu hw idx %lu... ", cpu_no); + prom_printf("starting cpu hw idx %u... ", cpu_no); call_prom("start-cpu", 3, 0, node, secondary_hold, cpu_no); @@ -1982,11 +1984,11 @@ static void __init prom_hold_cpus(void) if (*acknowledge == cpu_no) prom_printf("done\n"); else - prom_printf("failed: %x\n", *acknowledge); + prom_printf("failed: %lx\n", *acknowledge); } #ifdef CONFIG_SMP else - prom_printf("boot cpu hw idx %lu\n", cpu_no); + prom_printf("boot cpu hw idx %u\n", cpu_no); #endif /* CONFIG_SMP */ } @@ -2264,7 +2266,7 @@ static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end, while ((*mem_start + needed) > *mem_end) { unsigned long room, chunk; - prom_debug("Chunk exhausted, claiming more at %x...\n", + prom_debug("Chunk exhausted, claiming more at %lx...\n", alloc_bottom); room = alloc_top - alloc_bottom; if (room > DEVTREE_CHUNK_SIZE) @@ -2490,7 +2492,7 @@ static void __init flatten_device_tree(void) room = alloc_top - alloc_bottom - 0x4000; if (room > DEVTREE_CHUNK_SIZE) room = DEVTREE_CHUNK_SIZE; - prom_debug("starting device tree allocs at %x\n", alloc_bottom); + prom_debug("starting device tree allocs at %lx\n", alloc_bottom); /* Now try to claim that */ mem_start = (unsigned long)alloc_up(room, PAGE_SIZE); @@ -2553,7 +2555,7 @@ static void __init flatten_device_tree(void) int i; prom_printf("reserved memory map:\n"); for (i = 0; i < mem_reserve_cnt; i++) - prom_printf(" %x - %x\n", + prom_printf(" %llx - %llx\n", be64_to_cpu(mem_reserve_map[i].base), be64_to_cpu(mem_reserve_map[i].size)); } @@ -2563,9 +2565,9 @@ static void __init flatten_device_tree(void) */ mem_reserve_cnt = MEM_RESERVE_MAP_SIZE; - prom_printf("Device tree strings 0x%x -> 0x%x\n", + prom_printf("Device tree strings 0x%lx -> 0x%lx\n", dt_string_start, dt_string_end); - prom_printf("Device tree struct 0x%x -> 0x%x\n", + prom_printf("Device tree struct 0x%lx -> 0x%lx\n", dt_struct_start, dt_struct_end); } @@ -2997,7 +2999,7 @@ static void __init prom_find_boot_cpu(void) prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval)); prom.cpu = be32_to_cpu(rval); - prom_debug("Booting CPU hw index = %lu\n", prom.cpu); + prom_debug("Booting CPU hw index = %d\n", prom.cpu); } static void __init prom_check_initrd(unsigned long r3, unsigned long r4) @@ -3019,8 +3021,8 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4) reserve_mem(prom_initrd_start, prom_initrd_end - prom_initrd_start); - prom_debug("initrd_start=0x%x\n", prom_initrd_start); - prom_debug("initrd_end=0x%x\n", prom_initrd_end); + prom_debug("initrd_start=0x%lx\n", prom_initrd_start); + prom_debug("initrd_end=0x%lx\n", prom_initrd_end); } #endif /* CONFIG_BLK_DEV_INITRD */ } @@ -3273,7 +3275,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, /* Don't print anything after quiesce under OPAL, it crashes OFW */ if (of_platform != PLATFORM_OPAL) { prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase); - prom_debug("->dt_header_start=0x%x\n", hdr); + prom_debug("->dt_header_start=0x%lx\n", hdr); } #ifdef CONFIG_PPC32 -- GitLab From 56a58a5395aba19f0a4c3e18b46532511994841a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 23 May 2018 11:02:04 +0200 Subject: [PATCH 0463/1291] spi: sh-msiof: Fix setting SIRMDR1.SYNCAC to match SITMDR1.SYNCAC [ Upstream commit 0921e11e1e12802ae0a3c19cb02e33354ca51967 ] According to section 59.2.4 MSIOF Receive Mode Register 1 (SIRMDR1) in the R-Car Gen3 datasheet Rev.1.00, the value of the SIRMDR1.SYNCAC bit must match the value of the SITMDR1.SYNCAC bit. However, sh_msiof_spi_setup() changes only the latter. Fix this by updating the SIRMDR1 register like the SITMDR1 register, taking into account register bits that exist in SITMDR1 only. Reported-by: Renesas BSP team via Yoshihiro Shimoda Fixes: 7ff0b53c4051145d ("spi: sh-msiof: Avoid writing to registers from spi_master.setup()") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-sh-msiof.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 52056535f54e..0fea18ab970e 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -555,14 +555,16 @@ static int sh_msiof_spi_setup(struct spi_device *spi) /* Configure native chip select mode/polarity early */ clr = MDR1_SYNCMD_MASK; - set = MDR1_TRMD | TMDR1_PCON | MDR1_SYNCMD_SPI; + set = MDR1_SYNCMD_SPI; if (spi->mode & SPI_CS_HIGH) clr |= BIT(MDR1_SYNCAC_SHIFT); else set |= BIT(MDR1_SYNCAC_SHIFT); pm_runtime_get_sync(&p->pdev->dev); tmp = sh_msiof_read(p, TMDR1) & ~clr; - sh_msiof_write(p, TMDR1, tmp | set); + sh_msiof_write(p, TMDR1, tmp | set | MDR1_TRMD | TMDR1_PCON); + tmp = sh_msiof_read(p, RMDR1) & ~clr; + sh_msiof_write(p, RMDR1, tmp | set); pm_runtime_put(&p->pdev->dev); p->native_cs_high = spi->mode & SPI_CS_HIGH; p->native_cs_inited = true; -- GitLab From 210807e92b4902fa672c3ab6cb7381d374a58b23 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 24 May 2018 11:02:06 +0000 Subject: [PATCH 0464/1291] powerpc/8xx: fix invalid register expression in head_8xx.S [ Upstream commit e4ccb1dae6bdef228d729c076c38161ef6e7ca34 ] New binutils generate the following warning AS arch/powerpc/kernel/head_8xx.o arch/powerpc/kernel/head_8xx.S: Assembler messages: arch/powerpc/kernel/head_8xx.S:916: Warning: invalid register expression This patch fixes it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/head_8xx.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 4fee00d414e8..2d0d89e2cb9a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -958,7 +958,7 @@ start_here: tovirt(r6,r6) lis r5, abatron_pteptrs@h ori r5, r5, abatron_pteptrs@l - stw r5, 0xf0(r0) /* Must match your Abatron config file */ + stw r5, 0xf0(0) /* Must match your Abatron config file */ tophys(r5,r5) stw r6, 0(r5) -- GitLab From cf957ba7c6e8ab922ae45f46a31b5f8370bcf889 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 23 May 2018 21:07:12 +0200 Subject: [PATCH 0465/1291] pinctrl: at91-pio4: add missing of_node_put [ Upstream commit 21816364715f508c10da1e087e352bc1e326614f ] The device node iterators perform an of_node_get on each iteration, so a jump out of the loop requires an of_node_put. The semantic patch that fixes this problem is as follows (http://coccinelle.lip6.fr): // @@ expression root,e; local idexpression child; iterator name for_each_child_of_node; @@ for_each_child_of_node(root, child) { ... when != of_node_put(child) when != e = child + of_node_put(child); ? break; ... } ... when != child // Signed-off-by: Julia Lawall Acked-by: Ludovic Desroches Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-at91-pio4.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c index b1ca838dd80a..e61e2f8c91ce 100644 --- a/drivers/pinctrl/pinctrl-at91-pio4.c +++ b/drivers/pinctrl/pinctrl-at91-pio4.c @@ -576,8 +576,10 @@ static int atmel_pctl_dt_node_to_map(struct pinctrl_dev *pctldev, for_each_child_of_node(np_config, np) { ret = atmel_pctl_dt_subnode_to_map(pctldev, np, map, &reserved_maps, num_maps); - if (ret < 0) + if (ret < 0) { + of_node_put(np); break; + } } } -- GitLab From 39da0cf774db1f1ac41caa4d01a8c63903e5f85d Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 24 May 2018 12:26:46 +0530 Subject: [PATCH 0466/1291] bpf: powerpc64: pad function address loads with NOPs [ Upstream commit 4ea69b2fd623dee2bbc77d3b6b7d8c0924e2026a ] For multi-function programs, loading the address of a callee function to a register requires emitting instructions whose count varies from one to five depending on the nature of the address. Since we come to know of the callee's address only before the extra pass, the number of instructions required to load this address may vary from what was previously generated. This can make the JITed image grow or shrink. To avoid this, we should generate a constant five-instruction when loading function addresses by padding the optimized load sequence with NOPs. Signed-off-by: Sandipan Das Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/net/bpf_jit_comp64.c | 34 +++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index bd0786c23109..254634fb3fc7 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -203,25 +203,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func) { + unsigned int i, ctx_idx = ctx->idx; + + /* Load function address into r12 */ + PPC_LI64(12, func); + + /* For bpf-to-bpf function calls, the callee's address is unknown + * until the last extra pass. As seen above, we use PPC_LI64() to + * load the callee's address, but this may optimize the number of + * instructions required based on the nature of the address. + * + * Since we don't want the number of instructions emitted to change, + * we pad the optimized PPC_LI64() call with NOPs to guarantee that + * we always have a five-instruction sequence, which is the maximum + * that PPC_LI64() can emit. + */ + for (i = ctx->idx - ctx_idx; i < 5; i++) + PPC_NOP(); + #ifdef PPC64_ELF_ABI_v1 - /* func points to the function descriptor */ - PPC_LI64(b2p[TMP_REG_2], func); - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); - /* ... and move it to LR */ - PPC_MTLR(b2p[TMP_REG_1]); /* * Load TOC from function descriptor at offset 8. * We can clobber r2 since we get called through a * function pointer (so caller will save/restore r2) * and since we don't use a TOC ourself. */ - PPC_BPF_LL(2, b2p[TMP_REG_2], 8); -#else - /* We can clobber r12 */ - PPC_FUNC_ADDR(12, func); - PPC_MTLR(12); + PPC_BPF_LL(2, 12, 8); + /* Load actual entry point from function descriptor */ + PPC_BPF_LL(12, 12, 0); #endif + + PPC_MTLR(12); PPC_BLRL(); } -- GitLab From 082900a6d56766eeefd6f301c71f4d913d2cd0b9 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 23 May 2018 17:19:22 -0500 Subject: [PATCH 0467/1291] PCI: pciehp: Request control of native hotplug only if supported [ Upstream commit 408fec36a1ab3d14273c2116b449ef1e9be3cb8b ] Currently we request control of native PCIe hotplug unconditionally. Native PCIe hotplug events are handled by the pciehp driver, and if it is not enabled those events will be lost. Request control of native PCIe hotplug only if the pciehp driver is enabled, so we will actually handle native PCIe hotplug events. Suggested-by: Bjorn Helgaas Signed-off-by: Mika Westerberg Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/pci_root.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 6fc204a52493..eb857d6ea1fe 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -472,9 +472,11 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) } control = OSC_PCI_EXPRESS_CAPABILITY_CONTROL - | OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | OSC_PCI_EXPRESS_PME_CONTROL; + if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) + control |= OSC_PCI_EXPRESS_NATIVE_HP_CONTROL; + if (pci_aer_available()) { if (aer_acpi_firmware_first()) dev_info(&device->dev, -- GitLab From a30ff89c34b8b6b952a8ceb9bf68027f19e28be8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Wed, 23 May 2018 08:20:19 +0200 Subject: [PATCH 0468/1291] net: dsa: qca8k: Add support for QCA8334 switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 64cf81675a1f64c1b311e4611dd3b6a961607612 ] Add support for the four-port variant of the Qualcomm QCA833x switch. Signed-off-by: Michal Vokáč Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/qca8k.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 5ada7a41449c..2a2bbf5e9c8e 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -934,6 +934,7 @@ static SIMPLE_DEV_PM_OPS(qca8k_pm_ops, qca8k_suspend, qca8k_resume); static const struct of_device_id qca8k_of_match[] = { + { .compatible = "qca,qca8334" }, { .compatible = "qca,qca8337" }, { /* sentinel */ }, }; -- GitLab From 945e6ce33d80cd637e6e9f24871c926581bac2c9 Mon Sep 17 00:00:00 2001 From: Xinming Hu Date: Fri, 18 May 2018 15:38:54 +0800 Subject: [PATCH 0469/1291] mwifiex: correct histogram data with appropriate index [ Upstream commit 30bfce0b63fa68c14ae1613eb9d259fa18644074 ] Correct snr/nr/rssi data index to avoid possible buffer underflow. Signed-off-by: Xinming Hu Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/util.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/util.c b/drivers/net/wireless/marvell/mwifiex/util.c index 0cd68ffc2c74..51ccf10f4413 100644 --- a/drivers/net/wireless/marvell/mwifiex/util.c +++ b/drivers/net/wireless/marvell/mwifiex/util.c @@ -708,12 +708,14 @@ void mwifiex_hist_data_set(struct mwifiex_private *priv, u8 rx_rate, s8 snr, s8 nflr) { struct mwifiex_histogram_data *phist_data = priv->hist_data; + s8 nf = -nflr; + s8 rssi = snr - nflr; atomic_inc(&phist_data->num_samples); atomic_inc(&phist_data->rx_rate[rx_rate]); - atomic_inc(&phist_data->snr[snr]); - atomic_inc(&phist_data->noise_flr[128 + nflr]); - atomic_inc(&phist_data->sig_str[nflr - snr]); + atomic_inc(&phist_data->snr[snr + 128]); + atomic_inc(&phist_data->noise_flr[nf + 128]); + atomic_inc(&phist_data->sig_str[rssi + 128]); } /* function to reset histogram data during init/reset */ -- GitLab From cffbdb7702ba549f116c44fe3006a587c94f8885 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Fri, 27 Apr 2018 14:31:40 -0400 Subject: [PATCH 0470/1291] ima: based on policy verify firmware signatures (pre-allocated buffer) [ Upstream commit fd90bc559bfba743ae8de87ff23b92a5e4668062 ] Don't differentiate, for now, between kernel_read_file_id READING_FIRMWARE and READING_FIRMWARE_PREALLOC_BUFFER enumerations. Fixes: a098ecd firmware: support loading into a pre-allocated buffer (since 4.8) Signed-off-by: Mimi Zohar Cc: Luis R. Rodriguez Cc: David Howells Cc: Kees Cook Cc: Serge E. Hallyn Cc: Stephen Boyd Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 7e334fd31c15..f8553179bdd7 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -379,6 +379,7 @@ int ima_read_file(struct file *file, enum kernel_read_file_id read_id) static int read_idmap[READING_MAX_ID] = { [READING_FIRMWARE] = FIRMWARE_CHECK, + [READING_FIRMWARE_PREALLOC_BUFFER] = FIRMWARE_CHECK, [READING_MODULE] = MODULE_CHECK, [READING_KEXEC_IMAGE] = KEXEC_KERNEL_CHECK, [READING_KEXEC_INITRAMFS] = KEXEC_INITRAMFS_CHECK, -- GitLab From d0a963cf845c6ed34e911774b0de6bca7daa5e6f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 21 May 2018 18:19:49 +0100 Subject: [PATCH 0471/1291] drivers/perf: arm-ccn: don't log to dmesg in event_init [ Upstream commit 1898eb61fbc9703efee886d3abec27a388cf28c3 ] The ARM CCN PMU driver uses dev_warn() to complain about parameters in the user-provided perf_event_attr. This means that under normal operation (e.g. a single invocation of the perf tool), a number of messages warnings may be logged to dmesg. Tools may issue multiple syscalls to probe for feature support, and multiple applications (from multiple users) can attempt to open events simultaneously, so this is not very helpful, even if a user happens to have access to dmesg. Worse, this can push important information out of the dmesg ring buffer, and can significantly slow down syscall fuzzers, vastly increasing the time it takes to find critical bugs. Demote the dev_warn() instances to dev_dbg(), as is the case for all other PMU drivers under drivers/perf/. Users who wish to debug PMU event initialisation can enable dynamic debug to receive these messages. Signed-off-by: Mark Rutland Cc: Pawel Moll Cc: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/bus/arm-ccn.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 72fd1750134d..942d076cbb0a 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -736,7 +736,7 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) ccn = pmu_to_arm_ccn(event->pmu); if (hw->sample_period) { - dev_warn(ccn->dev, "Sampling not supported!\n"); + dev_dbg(ccn->dev, "Sampling not supported!\n"); return -EOPNOTSUPP; } @@ -744,12 +744,12 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) event->attr.exclude_kernel || event->attr.exclude_hv || event->attr.exclude_idle || event->attr.exclude_host || event->attr.exclude_guest) { - dev_warn(ccn->dev, "Can't exclude execution levels!\n"); + dev_dbg(ccn->dev, "Can't exclude execution levels!\n"); return -EINVAL; } if (event->cpu < 0) { - dev_warn(ccn->dev, "Can't provide per-task data!\n"); + dev_dbg(ccn->dev, "Can't provide per-task data!\n"); return -EOPNOTSUPP; } /* @@ -771,13 +771,13 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) switch (type) { case CCN_TYPE_MN: if (node_xp != ccn->mn_id) { - dev_warn(ccn->dev, "Invalid MN ID %d!\n", node_xp); + dev_dbg(ccn->dev, "Invalid MN ID %d!\n", node_xp); return -EINVAL; } break; case CCN_TYPE_XP: if (node_xp >= ccn->num_xps) { - dev_warn(ccn->dev, "Invalid XP ID %d!\n", node_xp); + dev_dbg(ccn->dev, "Invalid XP ID %d!\n", node_xp); return -EINVAL; } break; @@ -785,11 +785,11 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) break; default: if (node_xp >= ccn->num_nodes) { - dev_warn(ccn->dev, "Invalid node ID %d!\n", node_xp); + dev_dbg(ccn->dev, "Invalid node ID %d!\n", node_xp); return -EINVAL; } if (!arm_ccn_pmu_type_eq(type, ccn->node[node_xp].type)) { - dev_warn(ccn->dev, "Invalid type 0x%x for node %d!\n", + dev_dbg(ccn->dev, "Invalid type 0x%x for node %d!\n", type, node_xp); return -EINVAL; } @@ -808,19 +808,19 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) if (event_id != e->event) continue; if (e->num_ports && port >= e->num_ports) { - dev_warn(ccn->dev, "Invalid port %d for node/XP %d!\n", + dev_dbg(ccn->dev, "Invalid port %d for node/XP %d!\n", port, node_xp); return -EINVAL; } if (e->num_vcs && vc >= e->num_vcs) { - dev_warn(ccn->dev, "Invalid vc %d for node/XP %d!\n", + dev_dbg(ccn->dev, "Invalid vc %d for node/XP %d!\n", vc, node_xp); return -EINVAL; } valid = 1; } if (!valid) { - dev_warn(ccn->dev, "Invalid event 0x%x for node/XP %d!\n", + dev_dbg(ccn->dev, "Invalid event 0x%x for node/XP %d!\n", event_id, node_xp); return -EINVAL; } -- GitLab From 625d1e783375202c099b67aecb9b227d5b6f568f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 18 May 2018 10:30:07 -0700 Subject: [PATCH 0472/1291] spi: Add missing pm_runtime_put_noidle() after failed get [ Upstream commit 7e48e23a1f4a50f93ac1073f1326e0a73829b631 ] If pm_runtime_get_sync() fails we should call pm_runtime_put_noidle(). This is probably not a critical fix as we should only hit this when things are broken elsewhere. Signed-off-by: Tony Lindgren Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 84dfef4bd6ae..f85d30dc9187 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1222,6 +1222,7 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread) if (!was_busy && ctlr->auto_runtime_pm) { ret = pm_runtime_get_sync(ctlr->dev.parent); if (ret < 0) { + pm_runtime_put_noidle(ctlr->dev.parent); dev_err(&ctlr->dev, "Failed to power device: %d\n", ret); mutex_unlock(&ctlr->io_mutex); -- GitLab From 1d6908ce90b5506cdd5f163afeecffa0039a62e6 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Sat, 19 May 2018 16:53:18 +0100 Subject: [PATCH 0473/1291] net: hns3: Fix the missing client list node initialization [ Upstream commit 13562d1f5e2fbe2cf33b23a00abca3f71264c4ac ] This patch fixes the missing initialization of the client list node in the hnae3_register_client() function. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Xi Wang Signed-off-by: Peng Li Signed-off-by: Salil Mehta Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 9f8c56f9e9c1..69726908e72c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -2883,6 +2883,8 @@ static int __init hns3_init_module(void) client.ops = &client_ops; + INIT_LIST_HEAD(&client.node); + ret = hnae3_register_client(&client); if (ret) return ret; -- GitLab From 63c7e58dab1e6ba58e6c3c31097c58c8a1eaf0f5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 20 Apr 2018 16:30:02 -0700 Subject: [PATCH 0474/1291] fscrypt: use unbound workqueue for decryption [ Upstream commit 36dd26e0c8d42699eeba87431246c07c28075bae ] Improve fscrypt read performance by switching the decryption workqueue from bound to unbound. With the bound workqueue, when multiple bios completed on the same CPU, they were decrypted on that same CPU. But with the unbound queue, they are now decrypted in parallel on any CPU. Although fscrypt read performance can be tough to measure due to the many sources of variation, this change is most beneficial when decryption is slow, e.g. on CPUs without AES instructions. For example, I timed tarring up encrypted directories on f2fs. On x86 with AES-NI instructions disabled, the unbound workqueue improved performance by about 25-35%, using 1 to NUM_CPUs jobs with 4 or 8 CPUs available. But with AES-NI enabled, performance was unchanged to within ~2%. I also did the same test on a quad-core ARM CPU using xts-speck128-neon encryption. There performance was usually about 10% better with the unbound workqueue, bringing it closer to the unencrypted speed. The unbound workqueue may be worse in some cases due to worse locality, but I think it's still the better default. dm-crypt uses an unbound workqueue by default too, so this change makes fscrypt match. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/crypto/crypto.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index d262a93d9b31..daf2683f0655 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -446,8 +446,17 @@ int fscrypt_initialize(unsigned int cop_flags) */ static int __init fscrypt_init(void) { + /* + * Use an unbound workqueue to allow bios to be decrypted in parallel + * even when they happen to complete on the same CPU. This sacrifices + * locality, but it's worthwhile since decryption is CPU-intensive. + * + * Also use a high-priority workqueue to prioritize decryption work, + * which blocks reads from completing, over regular application tasks. + */ fscrypt_read_workqueue = alloc_workqueue("fscrypt_read_queue", - WQ_HIGHPRI, 0); + WQ_UNBOUND | WQ_HIGHPRI, + num_online_cpus()); if (!fscrypt_read_workqueue) goto fail; -- GitLab From c9f744135ad7004cb978ec59c745c5e852bf7476 Mon Sep 17 00:00:00 2001 From: Subhash Jadavani Date: Thu, 3 May 2018 16:37:17 +0530 Subject: [PATCH 0475/1291] scsi: ufs: ufshcd: fix possible unclocked register access [ Upstream commit b334456ec2021b1addc19806990115e69ec4ac32 ] Vendor specific setup_clocks ops may depend on clocks managed by ufshcd driver so if the vendor specific setup_clocks callback is called when the required clocks are turned off, it results into unclocked register access. This change make sure that required clocks are enabled before vendor specific setup_clocks callback is called. Signed-off-by: Subhash Jadavani Signed-off-by: Venkat Gopalakrishnan Signed-off-by: Can Guo Signed-off-by: Asutosh Das Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufshcd.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 3bb1f6cc297a..5efcfb528cb8 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6761,9 +6761,16 @@ static int __ufshcd_setup_clocks(struct ufs_hba *hba, bool on, if (list_empty(head)) goto out; - ret = ufshcd_vops_setup_clocks(hba, on, PRE_CHANGE); - if (ret) - return ret; + /* + * vendor specific setup_clocks ops may depend on clocks managed by + * this standard driver hence call the vendor specific setup_clocks + * before disabling the clocks managed here. + */ + if (!on) { + ret = ufshcd_vops_setup_clocks(hba, on, PRE_CHANGE); + if (ret) + return ret; + } list_for_each_entry(clki, head, list) { if (!IS_ERR_OR_NULL(clki->clk)) { @@ -6787,9 +6794,16 @@ static int __ufshcd_setup_clocks(struct ufs_hba *hba, bool on, } } - ret = ufshcd_vops_setup_clocks(hba, on, POST_CHANGE); - if (ret) - return ret; + /* + * vendor specific setup_clocks ops may depend on clocks managed by + * this standard driver hence call the vendor specific setup_clocks + * after enabling the clocks managed here. + */ + if (on) { + ret = ufshcd_vops_setup_clocks(hba, on, POST_CHANGE); + if (ret) + return ret; + } out: if (ret) { -- GitLab From b095b5888eab89b8e92418b2ebd93f6d535711b9 Mon Sep 17 00:00:00 2001 From: Maya Erez Date: Thu, 3 May 2018 16:37:16 +0530 Subject: [PATCH 0476/1291] scsi: ufs: fix exception event handling [ Upstream commit 2e3611e9546c2ed4def152a51dfd34e8dddae7a5 ] The device can set the exception event bit in one of the response UPIU, for example to notify the need for urgent BKOPs operation. In such a case, the host driver calls ufshcd_exception_event_handler to handle this notification. When trying to check the exception event status (for finding the cause for the exception event), the device may be busy with additional SCSI commands handling and may not respond within the 100ms timeout. To prevent that, we need to block SCSI commands during handling of exception events and allow retransmissions of the query requests, in case of timeout. Signed-off-by: Subhash Jadavani Signed-off-by: Maya Erez Signed-off-by: Can Guo Signed-off-by: Asutosh Das Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufshcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 5efcfb528cb8..21c81c1feac5 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4947,6 +4947,7 @@ static void ufshcd_exception_event_handler(struct work_struct *work) hba = container_of(work, struct ufs_hba, eeh_work); pm_runtime_get_sync(hba->dev); + scsi_block_requests(hba->host); err = ufshcd_get_ee_status(hba, &status); if (err) { dev_err(hba->dev, "%s: failed to get exception status %d\n", @@ -4960,6 +4961,7 @@ static void ufshcd_exception_event_handler(struct work_struct *work) ufshcd_bkops_exception_event_handler(hba); out: + scsi_unblock_requests(hba->host); pm_runtime_put_sync(hba->dev); return; } -- GitLab From fdb5207dc1f1e3844728df5bb9d125d0c0cee5e7 Mon Sep 17 00:00:00 2001 From: Jens Remus Date: Thu, 17 May 2018 19:15:05 +0200 Subject: [PATCH 0477/1291] scsi: zfcp: assert that the ERP lock is held when tracing a recovery trigger [ Upstream commit 9e156c54ace310ce7fb1cd960e62416947f3d47c ] Otherwise iterating with list_for_each() over the adapter->erp_ready_head and adapter->erp_running_head lists can lead to an infinite loop. See commit "zfcp: fix infinite iteration on erp_ready_head list". The run-time check is only performed for debug kernels which have the kernel lock validator enabled. Following is an example of the warning that is reported, if the ERP lock is not held when calling zfcp_dbf_rec_trig(): WARNING: CPU: 0 PID: 604 at drivers/s390/scsi/zfcp_dbf.c:288 zfcp_dbf_rec_trig+0x172/0x188 Modules linked in: ... CPU: 0 PID: 604 Comm: kworker/u128:3 Not tainted 4.16.0-... #1 Hardware name: IBM 2964 N96 702 (z/VM 6.4.0) Workqueue: zfcp_q_0.0.1906 zfcp_scsi_rport_work Krnl PSW : 00000000330fdbf9 00000000367e9728 (zfcp_dbf_rec_trig+0x172/0x188) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:3 PM:0 RI:0 EA:3 Krnl GPRS: 00000000c57a5d99 3288200000000000 0000000000000000 000000006cc82740 00000000009d09d6 0000000000000000 00000000000000ff 0000000000000000 0000000000000000 0000000000e1b5fe 000000006de01d38 0000000076130958 000000006cc82548 000000006de01a98 00000000009d09d6 000000006a6d3c80 Krnl Code: 00000000009d0ad2: eb7ff0b80004 lmg %r7,%r15,184(%r15) 00000000009d0ad8: c0f4000d7dd0 brcl 15,b80678 #00000000009d0ade: a7f40001 brc 15,9d0ae0 >00000000009d0ae2: a7f4ff7d brc 15,9d09dc 00000000009d0ae6: e340f0f00004 lg %r4,240(%r15) 00000000009d0aec: eb7ff0b80004 lmg %r7,%r15,184(%r15) 00000000009d0af2: 07f4 bcr 15,%r4 00000000009d0af4: 0707 bcr 0,%r7 Call Trace: ([<00000000009d09d6>] zfcp_dbf_rec_trig+0x66/0x188) [<00000000009dd740>] zfcp_scsi_rport_work+0x98/0x190 [<0000000000169b34>] process_one_work+0x3d4/0x6f8 [<000000000016a08a>] worker_thread+0x232/0x418 [<000000000017219e>] kthread+0x166/0x178 [<0000000000b815ea>] kernel_thread_starter+0x6/0xc [<0000000000b815e4>] kernel_thread_starter+0x0/0xc 2 locks held by kworker/u128:3/604: #0: ((wq_completion)name){+.+.}, at: [<0000000082af1024>] process_one_work+0x1dc/0x6f8 #1: ((work_completion)(&port->rport_work)){+.+.}, at: [<0000000082af1024>] process_one_work+0x1dc/0x6f8 Last Breaking-Event-Address: [<00000000009d0ade>] zfcp_dbf_rec_trig+0x16e/0x188 ---[ end trace b2f4020572e2c124 ]--- Suggested-by: Steffen Maier Signed-off-by: Jens Remus Reviewed-by: Benjamin Block Reviewed-by: Steffen Maier Signed-off-by: Steffen Maier Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_dbf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index b415ba42ca73..599447032e50 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -285,6 +285,8 @@ void zfcp_dbf_rec_trig(char *tag, struct zfcp_adapter *adapter, struct list_head *entry; unsigned long flags; + lockdep_assert_held(&adapter->erp_lock); + if (unlikely(!debug_level_enabled(dbf->rec, level))) return; -- GitLab From 33775b07426be6553003a15743bd24a8bf0d841e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 8 May 2018 20:39:46 +1000 Subject: [PATCH 0478/1291] drm/nouveau/fifo/gk104-: poll for runlist update completion [ Upstream commit 4f2fc25c0f8bcc8db1b8a7b21e88c3d7f35c5acb ] Newer HW doesn't appear to send this event, which will cause long delays in runlist updates if they don't complete immediately. RM doesn't use these events anywhere, and an NVGPU commit message notes that polling is the preferred method even on HW that supports the event. Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c index a7e55c422501..0b632dc0cf7d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c @@ -155,10 +155,10 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl) (target << 28)); nvkm_wr32(device, 0x002274, (runl << 20) | nr); - if (wait_event_timeout(fifo->runlist[runl].wait, - !(nvkm_rd32(device, 0x002284 + (runl * 0x08)) - & 0x00100000), - msecs_to_jiffies(2000)) == 0) + if (nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x002284 + (runl * 0x08)) & 0x00100000)) + break; + ) < 0) nvkm_error(subdev, "runlist %d update timeout\n", runl); unlock: mutex_unlock(&subdev->mutex); -- GitLab From 192591ade6ecaf9147b7fde977cda0bc77cc5a67 Mon Sep 17 00:00:00 2001 From: Vic Wei Date: Mon, 23 Apr 2018 15:17:07 -0700 Subject: [PATCH 0479/1291] Bluetooth: btusb: add ID for LiteOn 04ca:301a [ Upstream commit d666fc5479ad76a1bcbe6476d4997cea714bab2d ] Contains a QCA6174A chipset, with USB BT. Let's support loading firmware on it. >From usb-devices: T: Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 2.01 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=04ca ProdID=301a Rev= 0.01 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Signed-off-by: Vic Wei Signed-off-by: Matthias Kaehlcke Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index d89a674604b8..819521d5895e 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -279,6 +279,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x04ca, 0x3011), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x04ca, 0x3015), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x04ca, 0x3016), .driver_info = BTUSB_QCA_ROME }, + { USB_DEVICE(0x04ca, 0x301a), .driver_info = BTUSB_QCA_ROME }, /* Broadcom BCM2035 */ { USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 }, -- GitLab From aca6728fa1ede62972c266b30e618240ef8e9e6e Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 17 May 2018 22:48:17 +0200 Subject: [PATCH 0480/1291] rtc: tps6586x: fix possible race condition [ Upstream commit 63d22063073b0ab46d1e06fe633fb5de8f5c58e1 ] The probe function is not allowed to fail after the RTC is registered because the following may happen: CPU0: CPU1: sys_load_module() do_init_module() do_one_initcall() cmos_do_probe() rtc_device_register() __register_chrdev() cdev->owner = struct module* open("/dev/rtc0") rtc_device_unregister() module_put() free_module() module_free(mod->module_core) /* struct module *module is now freed */ chrdev_open() spin_lock(cdev_lock) cdev_get() try_module_get() module_is_live() /* dereferences already freed struct module* */ Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-tps6586x.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-tps6586x.c b/drivers/rtc/rtc-tps6586x.c index a3418a8a3796..97fdc99bfeef 100644 --- a/drivers/rtc/rtc-tps6586x.c +++ b/drivers/rtc/rtc-tps6586x.c @@ -276,14 +276,15 @@ static int tps6586x_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); platform_set_drvdata(pdev, rtc); - rtc->rtc = devm_rtc_device_register(&pdev->dev, dev_name(&pdev->dev), - &tps6586x_rtc_ops, THIS_MODULE); + rtc->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(rtc->rtc)) { ret = PTR_ERR(rtc->rtc); - dev_err(&pdev->dev, "RTC device register: ret %d\n", ret); + dev_err(&pdev->dev, "RTC allocate device: ret %d\n", ret); goto fail_rtc_register; } + rtc->rtc->ops = &tps6586x_rtc_ops; + ret = devm_request_threaded_irq(&pdev->dev, rtc->irq, NULL, tps6586x_rtc_irq, IRQF_ONESHOT, @@ -294,6 +295,13 @@ static int tps6586x_rtc_probe(struct platform_device *pdev) goto fail_rtc_register; } disable_irq(rtc->irq); + + ret = rtc_register_device(rtc->rtc); + if (ret) { + dev_err(&pdev->dev, "RTC device register: ret %d\n", ret); + goto fail_rtc_register; + } + return 0; fail_rtc_register: -- GitLab From 8f5e7596fe3a41be914056bab79230b2c71d7e63 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 17 May 2018 22:47:05 +0200 Subject: [PATCH 0481/1291] rtc: vr41xx: fix possible race condition [ Upstream commit 9a99247c9c1d1c95c6e8153d013979aac6111c6e ] The probe function is not allowed to fail after the RTC is registered because the following may happen: CPU0: CPU1: sys_load_module() do_init_module() do_one_initcall() cmos_do_probe() rtc_device_register() __register_chrdev() cdev->owner = struct module* open("/dev/rtc0") rtc_device_unregister() module_put() free_module() module_free(mod->module_core) /* struct module *module is now freed */ chrdev_open() spin_lock(cdev_lock) cdev_get() try_module_get() module_is_live() /* dereferences already freed struct module* */ Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-vr41xx.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c index 7ce22967fd16..7ed010714f29 100644 --- a/drivers/rtc/rtc-vr41xx.c +++ b/drivers/rtc/rtc-vr41xx.c @@ -292,13 +292,14 @@ static int rtc_probe(struct platform_device *pdev) goto err_rtc1_iounmap; } - rtc = devm_rtc_device_register(&pdev->dev, rtc_name, &vr41xx_rtc_ops, - THIS_MODULE); + rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(rtc)) { retval = PTR_ERR(rtc); goto err_iounmap_all; } + rtc->ops = &vr41xx_rtc_ops; + rtc->max_user_freq = MAX_PERIODIC_RATE; spin_lock_irq(&rtc_lock); @@ -340,6 +341,10 @@ static int rtc_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Real Time Clock of NEC VR4100 series\n"); + retval = rtc_register_device(rtc); + if (retval) + goto err_iounmap_all; + return 0; err_iounmap_all: -- GitLab From 9f25b548732cfdcda2b0bd638497934b4a8eaaf6 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 17 May 2018 22:26:21 +0200 Subject: [PATCH 0482/1291] rtc: tps65910: fix possible race condition [ Upstream commit e6000a438e534ee0afd9e83b67f4e23a26dd1067 ] The IRQ is requested before the struct rtc is allocated and registered, but this struct is used in the IRQ handler. This may lead to a NULL pointer dereference. Switch to devm_rtc_allocate_device/rtc_register_device to allocate the rtc before requesting the IRQ. Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-tps65910.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-tps65910.c b/drivers/rtc/rtc-tps65910.c index d0244d7979fc..a56b526db89a 100644 --- a/drivers/rtc/rtc-tps65910.c +++ b/drivers/rtc/rtc-tps65910.c @@ -380,6 +380,10 @@ static int tps65910_rtc_probe(struct platform_device *pdev) if (!tps_rtc) return -ENOMEM; + tps_rtc->rtc = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(tps_rtc->rtc)) + return PTR_ERR(tps_rtc->rtc); + /* Clear pending interrupts */ ret = regmap_read(tps65910->regmap, TPS65910_RTC_STATUS, &rtc_reg); if (ret < 0) @@ -421,10 +425,10 @@ static int tps65910_rtc_probe(struct platform_device *pdev) tps_rtc->irq = irq; device_set_wakeup_capable(&pdev->dev, 1); - tps_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, - &tps65910_rtc_ops, THIS_MODULE); - if (IS_ERR(tps_rtc->rtc)) { - ret = PTR_ERR(tps_rtc->rtc); + tps_rtc->rtc->ops = &tps65910_rtc_ops; + + ret = rtc_register_device(tps_rtc->rtc); + if (ret) { dev_err(&pdev->dev, "RTC device register: err %d\n", ret); return ret; } -- GitLab From 9b56c4151ac250f8ee4f320c0ac63ffdce275304 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 17 May 2018 20:02:23 +0200 Subject: [PATCH 0483/1291] ALSA: emu10k1: Rate-limit error messages about page errors [ Upstream commit 11d42c81036324697d367600bfc16f6dd37636fd ] The error messages at sanity checks of memory pages tend to repeat too many times once when it hits, and without the rate limit, it may flood and become unreadable. Replace such messages with the *_ratelimited() variant. Bugzilla: http://bugzilla.opensuse.org/show_bug.cgi?id=1093027 Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/emu10k1/memory.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c index 4f1f69be1865..8c778fa33031 100644 --- a/sound/pci/emu10k1/memory.c +++ b/sound/pci/emu10k1/memory.c @@ -237,13 +237,13 @@ search_empty(struct snd_emu10k1 *emu, int size) static int is_valid_page(struct snd_emu10k1 *emu, dma_addr_t addr) { if (addr & ~emu->dma_mask) { - dev_err(emu->card->dev, + dev_err_ratelimited(emu->card->dev, "max memory size is 0x%lx (addr = 0x%lx)!!\n", emu->dma_mask, (unsigned long)addr); return 0; } if (addr & (EMUPAGESIZE-1)) { - dev_err(emu->card->dev, "page is not aligned\n"); + dev_err_ratelimited(emu->card->dev, "page is not aligned\n"); return 0; } return 1; @@ -334,7 +334,7 @@ snd_emu10k1_alloc_pages(struct snd_emu10k1 *emu, struct snd_pcm_substream *subst else addr = snd_pcm_sgbuf_get_addr(substream, ofs); if (! is_valid_page(emu, addr)) { - dev_err(emu->card->dev, + dev_err_ratelimited(emu->card->dev, "emu: failure page = %d\n", idx); mutex_unlock(&hdr->block_mutex); return NULL; -- GitLab From 6192b115c58b725789cb333b4e043a6491c8d313 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Thu, 17 May 2018 15:27:22 +0800 Subject: [PATCH 0484/1291] regulator: pfuze100: add .is_enable() for pfuze100_swb_regulator_ops [ Upstream commit 0b01fd3d40fe6402e5fa3b491ef23109feb1aaa5 ] If is_enabled() is not defined, regulator core will assume this regulator is already enabled, then it can NOT be really enabled after disabled. Based on Li Jun's patch from the NXP kernel tree. Signed-off-by: Anson Huang Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/pfuze100-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index 63922a2167e5..659e516455be 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -158,6 +158,7 @@ static const struct regulator_ops pfuze100_sw_regulator_ops = { static const struct regulator_ops pfuze100_swb_regulator_ops = { .enable = regulator_enable_regmap, .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, .list_voltage = regulator_list_voltage_table, .map_voltage = regulator_map_voltage_ascend, .set_voltage_sel = regulator_set_voltage_sel_regmap, -- GitLab From 1b3433cfa222b65342a4f945b96bec991ed6fe05 Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Wed, 2 May 2018 13:08:11 +0200 Subject: [PATCH 0485/1291] md/raid1: add error handling of read error from FailFast device [ Upstream commit b33d10624fdc15cdf1495f3f00481afccec76783 ] Current handle_read_error() function calls fix_read_error() only if md device is RW and rdev does not include FailFast flag. It does not handle a read error from a RW device including FailFast flag. I am not sure it is intended. But I found that write IO error sets rdev faulty. The md module should handle the read IO error and write IO error equally. So I think read IO error should set rdev faulty. Signed-off-by: Gioh Kim Reviewed-by: Jack Wang Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid1.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 029ecba60727..78d830763704 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2462,6 +2462,8 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) fix_read_error(conf, r1_bio->read_disk, r1_bio->sector, r1_bio->sectors); unfreeze_array(conf); + } else if (mddev->ro == 0 && test_bit(FailFast, &rdev->flags)) { + md_error(mddev, rdev); } else { r1_bio->bios[r1_bio->read_disk] = IO_BLOCKED; } -- GitLab From 7627ecfc4902a724b8a9aac971e19bd05655ad85 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Fri, 4 May 2018 18:08:10 +0800 Subject: [PATCH 0486/1291] md: fix NULL dereference of mddev->pers in remove_and_add_spares() [ Upstream commit c42a0e2675721e1444f56e6132a07b7b1ec169ac ] We met NULL pointer BUG as follow: [ 151.760358] BUG: unable to handle kernel NULL pointer dereference at 0000000000000060 [ 151.761340] PGD 80000001011eb067 P4D 80000001011eb067 PUD 1011ea067 PMD 0 [ 151.762039] Oops: 0000 [#1] SMP PTI [ 151.762406] Modules linked in: [ 151.762723] CPU: 2 PID: 3561 Comm: mdadm-test Kdump: loaded Not tainted 4.17.0-rc1+ #238 [ 151.763542] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014 [ 151.764432] RIP: 0010:remove_and_add_spares.part.56+0x13c/0x3a0 [ 151.765061] RSP: 0018:ffffc90001d7fcd8 EFLAGS: 00010246 [ 151.765590] RAX: 0000000000000000 RBX: ffff88013601d600 RCX: 0000000000000000 [ 151.766306] RDX: 0000000000000000 RSI: ffff88013601d600 RDI: ffff880136187000 [ 151.767014] RBP: ffff880136187018 R08: 0000000000000003 R09: 0000000000000051 [ 151.767728] R10: ffffc90001d7fed8 R11: 0000000000000000 R12: ffff88013601d600 [ 151.768447] R13: ffff8801298b1300 R14: ffff880136187000 R15: 0000000000000000 [ 151.769160] FS: 00007f2624276700(0000) GS:ffff88013ae80000(0000) knlGS:0000000000000000 [ 151.769971] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 151.770554] CR2: 0000000000000060 CR3: 0000000111aac000 CR4: 00000000000006e0 [ 151.771272] Call Trace: [ 151.771542] md_ioctl+0x1df2/0x1e10 [ 151.771906] ? __switch_to+0x129/0x440 [ 151.772295] ? __schedule+0x244/0x850 [ 151.772672] blkdev_ioctl+0x4bd/0x970 [ 151.773048] block_ioctl+0x39/0x40 [ 151.773402] do_vfs_ioctl+0xa4/0x610 [ 151.773770] ? dput.part.23+0x87/0x100 [ 151.774151] ksys_ioctl+0x70/0x80 [ 151.774493] __x64_sys_ioctl+0x16/0x20 [ 151.774877] do_syscall_64+0x5b/0x180 [ 151.775258] entry_SYSCALL_64_after_hwframe+0x44/0xa9 For raid6, when two disk of the array are offline, two spare disks can be added into the array. Before spare disks recovery completing, system reboot and mdadm thinks it is ok to restart the degraded array by md_ioctl(). Since disks in raid6 is not only_parity(), raid5_run() will abort, when there is no PPL feature or not setting 'start_dirty_degraded' parameter. Therefore, mddev->pers is NULL. But, mddev->raid_disks has been set and it will not be cleared when raid5_run abort. md_ioctl() can execute cmd 'HOT_REMOVE_DISK' to remove a disk by mdadm, which will cause NULL pointer dereference in remove_and_add_spares() finally. Signed-off-by: Yufen Yu Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 11a67eac55b1..5599712d478e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6498,6 +6498,9 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev) char b[BDEVNAME_SIZE]; struct md_rdev *rdev; + if (!mddev->pers) + return -ENODEV; + rdev = find_rdev(mddev, dev); if (!rdev) return -ENXIO; -- GitLab From 41b16e6ec6a8c269a4d7b75bf36523662c402db4 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Mon, 14 May 2018 11:16:16 -0700 Subject: [PATCH 0487/1291] ixgbevf: fix MAC address changes through ixgbevf_set_mac() [ Upstream commit 6e7d0ba1e59b1a306761a731e67634c0f2efea2a ] Set hw->mac.perm_addr in ixgbevf_set_mac() in order to avoid losing the custom MAC on reset. This can happen in the following case: >ip link set $vf address $mac >ethtool -r $vf Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 90ecc4b06462..90be4385bf36 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3737,6 +3737,7 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) return -EPERM; ether_addr_copy(hw->mac.addr, addr->sa_data); + ether_addr_copy(hw->mac.perm_addr, addr->sa_data); ether_addr_copy(netdev->dev_addr, addr->sa_data); return 0; -- GitLab From 0bd08027bd197103aa1c42843a894dca7414b5d0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 25 Apr 2018 11:04:21 -0400 Subject: [PATCH 0488/1291] media: smiapp: fix timeout checking in smiapp_read_nvm [ Upstream commit 7a2148dfda8001c983f0effd9afd8a7fa58e99c4 ] The current code decrements the timeout counter i and the end of each loop i is incremented, so the check for timeout will always be false and hence the timeout mechanism is just a dead code path. Potentially, if the RD_READY bit is not set, we could end up in an infinite loop. Fix this so the timeout starts from 1000 and decrements to zero, if at the end of the loop i is zero we have a timeout condition. Detected by CoverityScan, CID#1324008 ("Logically dead code") Fixes: ccfc97bdb5ae ("[media] smiapp: Add driver") Signed-off-by: Colin Ian King Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/smiapp/smiapp-core.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/media/i2c/smiapp/smiapp-core.c b/drivers/media/i2c/smiapp/smiapp-core.c index 700f433261d0..e4d7f2febf00 100644 --- a/drivers/media/i2c/smiapp/smiapp-core.c +++ b/drivers/media/i2c/smiapp/smiapp-core.c @@ -1001,7 +1001,7 @@ static int smiapp_read_nvm(struct smiapp_sensor *sensor, if (rval) goto out; - for (i = 0; i < 1000; i++) { + for (i = 1000; i > 0; i--) { rval = smiapp_read( sensor, SMIAPP_REG_U8_DATA_TRANSFER_IF_1_STATUS, &s); @@ -1012,11 +1012,10 @@ static int smiapp_read_nvm(struct smiapp_sensor *sensor, if (s & SMIAPP_DATA_TRANSFER_IF_1_STATUS_RD_READY) break; - if (--i == 0) { - rval = -ETIMEDOUT; - goto out; - } - + } + if (!i) { + rval = -ETIMEDOUT; + goto out; } for (i = 0; i < SMIAPP_NVM_PAGE_SIZE; i++) { -- GitLab From d239ee35ddb2a967d1b6003eef180cb544c95249 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 15 May 2018 18:37:25 -0500 Subject: [PATCH 0489/1291] net: ethernet: ti: cpsw-phy-sel: check bus_find_device() ret value [ Upstream commit c6213eb1aee308e67377fd1890d84f7284caf531 ] This fixes klockworks warnings: Pointer 'dev' returned from call to function 'bus_find_device' at line 179 may be NULL and will be dereferenced at line 181. cpsw-phy-sel.c:179: 'dev' is assigned the return value from function 'bus_find_device'. bus.c:342: 'bus_find_device' explicitly returns a NULL value. cpsw-phy-sel.c:181: 'dev' is dereferenced by passing argument 1 to function 'dev_get_drvdata'. device.h:1024: 'dev' is passed to function 'dev_get_drvdata'. device.h:1026: 'dev' is explicitly dereferenced. Signed-off-by: Grygorii Strashko [nsekhar@ti.com: add an error message, fix return path] Signed-off-by: Sekhar Nori Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ti/cpsw-phy-sel.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c index 18013645e76c..0c1adad7415d 100644 --- a/drivers/net/ethernet/ti/cpsw-phy-sel.c +++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c @@ -177,12 +177,18 @@ void cpsw_phy_sel(struct device *dev, phy_interface_t phy_mode, int slave) } dev = bus_find_device(&platform_bus_type, NULL, node, match); - of_node_put(node); + if (!dev) { + dev_err(dev, "unable to find platform device for %pOF\n", node); + goto out; + } + priv = dev_get_drvdata(dev); priv->cpsw_phy_sel(priv, phy_mode, slave); put_device(dev); +out: + of_node_put(node); } EXPORT_SYMBOL_GPL(cpsw_phy_sel); -- GitLab From 25436aa5c184b93e364c9c97d754f71725be1179 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 16 May 2018 20:07:18 +0200 Subject: [PATCH 0490/1291] ALSA: usb-audio: Apply rate limit to warning messages in URB complete callback [ Upstream commit 377a879d9832f4ba69bd6a1fc996bb4181b1e504 ] retire_capture_urb() may print warning messages when the given URB doesn't align, and this may flood the system log easily. Put the rate limit to the message for avoiding it. Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1093485 Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/usb/pcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 3cbfae6604f9..d8a46d46bcd2 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -1311,7 +1311,7 @@ static void retire_capture_urb(struct snd_usb_substream *subs, if (bytes % (runtime->sample_bits >> 3) != 0) { int oldbytes = bytes; bytes = frames * stride; - dev_warn(&subs->dev->dev, + dev_warn_ratelimited(&subs->dev->dev, "Corrected urb data len. %d->%d\n", oldbytes, bytes); } -- GitLab From 62a87c8759f272e65f7f60f4f7d2d7b3671e1edc Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 16 Apr 2018 12:37:09 -0400 Subject: [PATCH 0491/1291] media: atomisp: ov2680: don't declare unused vars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e5c0680fd2c44252868fe4062558925b5506b179 ] drivers/staging/media/atomisp/i2c/atomisp-ov2680.c: In function ‘__ov2680_set_exposure’: drivers/staging/media/atomisp/i2c/atomisp-ov2680.c:400:10: warning: variable ‘hts’ set but not used [-Wunused-but-set-variable] u16 vts,hts; ^~~ drivers/staging/media/atomisp/i2c/atomisp-ov2680.c: In function ‘ov2680_detect’: drivers/staging/media/atomisp/i2c/atomisp-ov2680.c:1164:5: warning: variable ‘revision’ set but not used [-Wunused-but-set-variable] u8 revision; ^~~~~~~~ Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/atomisp/i2c/ov2680.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/media/atomisp/i2c/ov2680.c b/drivers/staging/media/atomisp/i2c/ov2680.c index 51b7d61df0f5..179576224319 100644 --- a/drivers/staging/media/atomisp/i2c/ov2680.c +++ b/drivers/staging/media/atomisp/i2c/ov2680.c @@ -396,12 +396,11 @@ static long __ov2680_set_exposure(struct v4l2_subdev *sd, int coarse_itg, { struct i2c_client *client = v4l2_get_subdevdata(sd); struct ov2680_device *dev = to_ov2680_sensor(sd); - u16 vts,hts; + u16 vts; int ret,exp_val; dev_dbg(&client->dev, "+++++++__ov2680_set_exposure coarse_itg %d, gain %d, digitgain %d++\n",coarse_itg, gain, digitgain); - hts = ov2680_res[dev->fmt_idx].pixels_per_line; vts = ov2680_res[dev->fmt_idx].lines_per_frame; /* group hold */ @@ -1190,7 +1189,8 @@ static int ov2680_detect(struct i2c_client *client) OV2680_SC_CMMN_SUB_ID, &high); revision = (u8) high & 0x0f; - dev_info(&client->dev, "sensor_revision id = 0x%x\n", id); + dev_info(&client->dev, "sensor_revision id = 0x%x, rev= %d\n", + id, revision); return 0; } -- GitLab From 6a43d5a3ca6c03103e614ac7d075615ef6609731 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 30 Apr 2018 13:56:32 +0100 Subject: [PATCH 0492/1291] arm64: cmpwait: Clear event register before arming exclusive monitor [ Upstream commit 1cfc63b5ae60fe7e01773f38132f98d8b13a99a0 ] When waiting for a cacheline to change state in cmpwait, we may immediately wake-up the first time around the outer loop if the event register was already set (for example, because of the event stream). Avoid these spurious wakeups by explicitly clearing the event register before loading the cacheline and setting the exclusive monitor. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cmpxchg.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index ae852add053d..0f2e1ab5e166 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -229,7 +229,9 @@ static inline void __cmpwait_case_##name(volatile void *ptr, \ unsigned long tmp; \ \ asm volatile( \ - " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ + " sevl\n" \ + " wfe\n" \ + " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ " cbnz %" #w "[tmp], 1f\n" \ " wfe\n" \ -- GitLab From d85073283aa7f838b3add199cbccfaed0a943bb5 Mon Sep 17 00:00:00 2001 From: Terry Junge Date: Mon, 30 Apr 2018 13:32:46 -0700 Subject: [PATCH 0493/1291] HID: hid-plantronics: Re-resend Update to map button for PTT products [ Upstream commit 37e376df5f4993677c33968a0c19b0c5acbf1108 ] Add a mapping for Push-To-Talk joystick trigger button. Tested on ChromeBox/ChromeBook with various Plantronics devices. Signed-off-by: Terry Junge Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-plantronics.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-plantronics.c b/drivers/hid/hid-plantronics.c index febb21ee190e..584b10d3fc3d 100644 --- a/drivers/hid/hid-plantronics.c +++ b/drivers/hid/hid-plantronics.c @@ -2,7 +2,7 @@ * Plantronics USB HID Driver * * Copyright (c) 2014 JD Cole - * Copyright (c) 2015 Terry Junge + * Copyright (c) 2015-2018 Terry Junge */ /* @@ -48,6 +48,10 @@ static int plantronics_input_mapping(struct hid_device *hdev, unsigned short mapped_key; unsigned long plt_type = (unsigned long)hid_get_drvdata(hdev); + /* special case for PTT products */ + if (field->application == HID_GD_JOYSTICK) + goto defaulted; + /* handle volume up/down mapping */ /* non-standard types or multi-HID interfaces - plt_type is PID */ if (!(plt_type & HID_USAGE_PAGE)) { -- GitLab From 3ee32f73fc7258b5470286d8d05585b3b4d10486 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 23 Apr 2018 01:40:16 +0000 Subject: [PATCH 0494/1291] arm64: dts: renesas: salvator-common: use audio-graph-card for Sound [ Upstream commit 06a574c7ef69bd0bd26ed08e35967acb76622ab3 ] Current Sound is using simple-audio-card which can't support HDMI. To use HDMI sound, we need to use audio-graph-card. But, one note is that r8a7795 has 2 HDMI ports, but r8a7796 has 1. Because of this mismatch, supporting HDMI on salvator-common is impossible. Thus, this patch exchange sound card to audio-graph-card and keep supporting ak4613 as 1st sound node. r8a7795/r8a7796 salvator-x{s} need to add HDMI sound individually. Signed-off-by: Kuninori Morimoto Tested-by: Nguyen Viet Dung Signed-off-by: Simon Horman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../boot/dts/renesas/salvator-common.dtsi | 38 +++++++++++-------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index 9eb11a8d9eda..26a978616071 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -93,20 +93,12 @@ reg_12v: regulator2 { regulator-always-on; }; - rsnd_ak4613: sound { - compatible = "simple-audio-card"; + sound_card: sound { + compatible = "audio-graph-card"; - simple-audio-card,format = "left_j"; - simple-audio-card,bitclock-master = <&sndcpu>; - simple-audio-card,frame-master = <&sndcpu>; + label = "rcar-sound"; - sndcpu: simple-audio-card,cpu { - sound-dai = <&rcar_sound>; - }; - - sndcodec: simple-audio-card,codec { - sound-dai = <&ak4613>; - }; + dais = <&rsnd_port0>; }; vbus0_usb2: regulator-vbus0-usb2 { @@ -320,6 +312,12 @@ ak4613: codec@10 { asahi-kasei,out4-single-end; asahi-kasei,out5-single-end; asahi-kasei,out6-single-end; + + port { + ak4613_endpoint: endpoint { + remote-endpoint = <&rsnd_endpoint0>; + }; + }; }; cs2000: clk_multiplier@4f { @@ -538,10 +536,18 @@ &rcar_sound { <&audio_clk_c>, <&cpg CPG_CORE CPG_AUDIO_CLK_I>; - rcar_sound,dai { - dai0 { - playback = <&ssi0 &src0 &dvc0>; - capture = <&ssi1 &src1 &dvc1>; + ports { + rsnd_port0: port@0 { + rsnd_endpoint0: endpoint { + remote-endpoint = <&ak4613_endpoint>; + + dai-format = "left_j"; + bitclock-master = <&rsnd_endpoint0>; + frame-master = <&rsnd_endpoint0>; + + playback = <&ssi0 &src0 &dvc0>; + capture = <&ssi1 &src1 &dvc1>; + }; }; }; }; -- GitLab From 14bedc05ab2d18bd17014615f0897545337eabf1 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 24 Apr 2018 15:15:13 +0200 Subject: [PATCH 0495/1291] drm/radeon: fix mode_valid's return type [ Upstream commit 7a47f20eb1fb8fa8d7a8fe3a4fd8c721f04c2174 ] The method struct drm_connector_helper_funcs::mode_valid is defined as returning an 'enum drm_mode_status' but the driver implementation for this method uses an 'int' for it. Fix this by using 'enum drm_mode_status' in the driver too. Signed-off-by: Luc Van Oostenryck Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_connectors.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 424cd1b66575..337d3a1c2a40 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -853,7 +853,7 @@ static int radeon_lvds_get_modes(struct drm_connector *connector) return ret; } -static int radeon_lvds_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_encoder *encoder = radeon_best_single_encoder(connector); @@ -1013,7 +1013,7 @@ static int radeon_vga_get_modes(struct drm_connector *connector) return ret; } -static int radeon_vga_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_vga_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; @@ -1157,7 +1157,7 @@ static int radeon_tv_get_modes(struct drm_connector *connector) return 1; } -static int radeon_tv_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_tv_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { if ((mode->hdisplay > 1024) || (mode->vdisplay > 768)) @@ -1499,7 +1499,7 @@ static void radeon_dvi_force(struct drm_connector *connector) radeon_connector->use_digital = true; } -static int radeon_dvi_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_dvi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; @@ -1801,7 +1801,7 @@ radeon_dp_detect(struct drm_connector *connector, bool force) return ret; } -static int radeon_dp_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; -- GitLab From b42848b2a819ae2fd310080bfb32221fde9e084d Mon Sep 17 00:00:00 2001 From: Samuel Li Date: Wed, 18 Apr 2018 16:26:18 -0400 Subject: [PATCH 0496/1291] drm/amdgpu: Remove VRAM from shared bo domains. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9b3f217faf48603c91d4ca44a18e6ff74c3c1c0c ] This fixes an issue introduced by change "allow framebuffer in GART memory as well" which could lead to a shared buffer ending up pinned in vram. Use GTT if it is included in the domain, otherwise return an error. Signed-off-by: Samuel Li Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 1360a24d2ede..f08624f2f209 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -683,8 +683,12 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return -EINVAL; /* A shared bo cannot be migrated to VRAM */ - if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM)) - return -EINVAL; + if (bo->prime_shared_count) { + if (domain & AMDGPU_GEM_DOMAIN_GTT) + domain = AMDGPU_GEM_DOMAIN_GTT; + else + return -EINVAL; + } if (bo->pin_count) { uint32_t mem_type = bo->tbo.mem.mem_type; -- GitLab From 83eef34c65f179af463924e34fc687426ac2f705 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Thu, 10 May 2018 23:59:19 +0200 Subject: [PATCH 0497/1291] powerpc/embedded6xx/hlwd-pic: Prevent interrupts from being handled by Starlet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9dcb3df4281876731e4e8bff7940514d72375154 ] The interrupt controller inside the Wii's Hollywood chip is connected to two masters, the "Broadway" PowerPC and the "Starlet" ARM926, each with their own interrupt status and mask registers. When booting the Wii with mini[1], interrupts from the SD card controller (IRQ 7) are handled by the ARM, because mini provides SD access over IPC. Linux however can't currently use or disable this IPC service, so both sides try to handle IRQ 7 without coordination. Let's instead make sure that all interrupts that are unmasked on the PPC side are masked on the ARM side; this will also make sure that Linux can properly talk to the SD card controller (and potentially other devices). If access to a device through IPC is desired in the future, interrupts from that device should not be handled by Linux directly. [1]: https://github.com/lewurm/mini Signed-off-by: Jonathan Neuschäfer Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/embedded6xx/hlwd-pic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index 89c54de88b7a..bf4a125faec6 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -35,6 +35,8 @@ */ #define HW_BROADWAY_ICR 0x00 #define HW_BROADWAY_IMR 0x04 +#define HW_STARLET_ICR 0x08 +#define HW_STARLET_IMR 0x0c /* @@ -74,6 +76,9 @@ static void hlwd_pic_unmask(struct irq_data *d) void __iomem *io_base = irq_data_get_irq_chip_data(d); setbits32(io_base + HW_BROADWAY_IMR, 1 << irq); + + /* Make sure the ARM (aka. Starlet) doesn't handle this interrupt. */ + clrbits32(io_base + HW_STARLET_IMR, 1 << irq); } -- GitLab From d0eca5d32b11ef4d7c176a362c55e4ed53f13a73 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 9 May 2018 12:12:15 -0700 Subject: [PATCH 0498/1291] HID: i2c-hid: check if device is there before really probing [ Upstream commit b3a81b6c4fc6730ac49e20d789a93c0faabafc98 ] On many Chromebooks touch devices are multi-sourced; the components are electrically compatible and one can be freely swapped for another without changing the OS image or firmware. To avoid bunch of scary messages when device is not actually present in the system let's try testing basic communication with it and if there is no response terminate probe early with -ENXIO. Signed-off-by: Dmitry Torokhov Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index d92827556389..136a34dc31b8 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -1036,6 +1036,14 @@ static int i2c_hid_probe(struct i2c_client *client, pm_runtime_enable(&client->dev); device_enable_async_suspend(&client->dev); + /* Make sure there is something at this address */ + ret = i2c_smbus_read_byte(client); + if (ret < 0) { + dev_dbg(&client->dev, "nothing at this address: %d\n", ret); + ret = -ENXIO; + goto err_pm; + } + ret = i2c_hid_fetch_hid_descriptor(ihid); if (ret < 0) goto err_pm; -- GitLab From d7ed9da96a8a4cb47601e56f5632167b46679fbb Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 14 May 2018 12:04:01 -0500 Subject: [PATCH 0499/1291] EDAC, altera: Fix ARM64 build warning [ Upstream commit 9ef20753e044f7468c4113e5aecd785419b0b3cc ] The kbuild test robot reported the following warning: drivers/edac/altera_edac.c: In function 'ocram_free_mem': drivers/edac/altera_edac.c:1410:42: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] gen_pool_free((struct gen_pool *)other, (u32)p, size); ^ After adding support for ARM64 architectures, the unsigned long parameter is 64 bits and causes a build warning on 64-bit configs. Fix by casting to the correct size (unsigned long) instead of u32. Reported-by: kbuild test robot Signed-off-by: Thor Thayer Cc: linux-arm-kernel@lists.infradead.org Cc: linux-edac Fixes: c3eea1942a16 ("EDAC, altera: Add Altera L2 cache and OCRAM support") Link: http://lkml.kernel.org/r/1526317441-4996-1-git-send-email-thor.thayer@linux.intel.com Signed-off-by: Borislav Petkov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/edac/altera_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 346c4987b284..38983f56ad0d 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1106,7 +1106,7 @@ static void *ocram_alloc_mem(size_t size, void **other) static void ocram_free_mem(void *p, size_t size, void *other) { - gen_pool_free((struct gen_pool *)other, (u32)p, size); + gen_pool_free((struct gen_pool *)other, (unsigned long)p, size); } static const struct edac_device_prv_data ocramecc_data = { -- GitLab From b565e4e9dd09e55bfb0340e58d67c395dcee30c2 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Fri, 20 Apr 2018 17:41:31 +0200 Subject: [PATCH 0500/1291] ARM: dts: stih407-pinctrl: Fix complain about IRQ_TYPE_NONE usage [ Upstream commit e95b8e718f9bd2386a29639dd21c633b4951dc21 ] Since commit 83a86fbb5b56 ("irqchip/gic: Loudly complain about the use of IRQ_TYPE_NONE") kernel is complaining about the IRQ_TYPE_NONE usage which shouldn't be used. Use IRQ_TYPE_LEVEL_HIGH instead. Signed-off-by: Patrice Chotard Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/stih407-pinctrl.dtsi | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/stih407-pinctrl.dtsi b/arch/arm/boot/dts/stih407-pinctrl.dtsi index bd1a82e8fffe..fe501d32d059 100644 --- a/arch/arm/boot/dts/stih407-pinctrl.dtsi +++ b/arch/arm/boot/dts/stih407-pinctrl.dtsi @@ -52,7 +52,7 @@ pin-controller-sbc { st,syscfg = <&syscfg_sbc>; reg = <0x0961f080 0x4>; reg-names = "irqmux"; - interrupts = ; + interrupts = ; interrupt-names = "irqmux"; ranges = <0 0x09610000 0x6000>; @@ -376,7 +376,7 @@ pin-controller-front0 { st,syscfg = <&syscfg_front>; reg = <0x0920f080 0x4>; reg-names = "irqmux"; - interrupts = ; + interrupts = ; interrupt-names = "irqmux"; ranges = <0 0x09200000 0x10000>; @@ -936,7 +936,7 @@ pin-controller-front1 { st,syscfg = <&syscfg_front>; reg = <0x0921f080 0x4>; reg-names = "irqmux"; - interrupts = ; + interrupts = ; interrupt-names = "irqmux"; ranges = <0 0x09210000 0x10000>; @@ -969,7 +969,7 @@ pin-controller-rear { st,syscfg = <&syscfg_rear>; reg = <0x0922f080 0x4>; reg-names = "irqmux"; - interrupts = ; + interrupts = ; interrupt-names = "irqmux"; ranges = <0 0x09220000 0x6000>; @@ -1164,7 +1164,7 @@ pin-controller-flash { st,syscfg = <&syscfg_flash>; reg = <0x0923f080 0x4>; reg-names = "irqmux"; - interrupts = ; + interrupts = ; interrupt-names = "irqmux"; ranges = <0 0x09230000 0x3000>; -- GitLab From 9c552c4e262fb4a640ada039b395b954960a066e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 7 May 2018 15:40:05 +0200 Subject: [PATCH 0501/1291] ARM: dts: emev2: Add missing interrupt-affinity to PMU node [ Upstream commit 7207b94754b6f503b278b5b200faaf662ffa1da8 ] The PMU node references two interrupts, but lacks the interrupt-affinity property, which is required in that case: hw perfevents: no interrupt-affinity property for /pmu, guessing. Add the missing property to fix this. Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/emev2.dtsi | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/emev2.dtsi b/arch/arm/boot/dts/emev2.dtsi index 42ea246e71cb..fec1241b858f 100644 --- a/arch/arm/boot/dts/emev2.dtsi +++ b/arch/arm/boot/dts/emev2.dtsi @@ -31,13 +31,13 @@ cpus { #address-cells = <1>; #size-cells = <0>; - cpu@0 { + cpu0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <0>; clock-frequency = <533000000>; }; - cpu@1 { + cpu1: cpu@1 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <1>; @@ -57,6 +57,7 @@ pmu { compatible = "arm,cortex-a9-pmu"; interrupts = , ; + interrupt-affinity = <&cpu0>, <&cpu1>; }; clocks@e0110000 { -- GitLab From 901366594729fdd10d1288487dd86011d6f135d0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 7 May 2018 15:40:04 +0200 Subject: [PATCH 0502/1291] ARM: dts: sh73a0: Add missing interrupt-affinity to PMU node [ Upstream commit 57a66497e1b7486609250a482f05935eae5035e9 ] The PMU node references two interrupts, but lacks the interrupt-affinity property, which is required in that case: hw perfevents: no interrupt-affinity property for /pmu, guessing. Add the missing property to fix this. Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sh73a0.dtsi | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/sh73a0.dtsi b/arch/arm/boot/dts/sh73a0.dtsi index 4ea5c5a16c57..5fc24d4c2d5d 100644 --- a/arch/arm/boot/dts/sh73a0.dtsi +++ b/arch/arm/boot/dts/sh73a0.dtsi @@ -22,7 +22,7 @@ cpus { #address-cells = <1>; #size-cells = <0>; - cpu@0 { + cpu0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <0>; @@ -30,7 +30,7 @@ cpu@0 { power-domains = <&pd_a2sl>; next-level-cache = <&L2>; }; - cpu@1 { + cpu1: cpu@1 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <1>; @@ -89,6 +89,7 @@ pmu { compatible = "arm,cortex-a9-pmu"; interrupts = , ; + interrupt-affinity = <&cpu0>, <&cpu1>; }; cmt1: timer@e6138000 { -- GitLab From 6fdc5235693b333f28a6581d049799156ba2807a Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Fri, 11 May 2018 12:07:03 +0100 Subject: [PATCH 0503/1291] nvmem: properly handle returned value nvmem_reg_read [ Upstream commit 50808bfcc14b854775a9f1d0abe3dac2babcf5c3 ] Function nvmem_reg_read can return a non zero value indicating an error. This returned value must be read and error propagated to nvmem_cell_prepare_write_buffer. Silence the following gcc warning (W=1): drivers/nvmem/core.c:1093:9: warning: variable 'rc' set but not used [-Wunused-but-set-variable] Signed-off-by: Mathieu Malaterre Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index d12e5de78e70..2afafd5d8915 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -1049,6 +1049,8 @@ static inline void *nvmem_cell_prepare_write_buffer(struct nvmem_cell *cell, /* setup the first byte with lsb bits from nvmem */ rc = nvmem_reg_read(nvmem, cell->offset, &v, 1); + if (rc) + goto err; *b++ |= GENMASK(bit_offset - 1, 0) & v; /* setup rest of the byte if any */ @@ -1067,11 +1069,16 @@ static inline void *nvmem_cell_prepare_write_buffer(struct nvmem_cell *cell, /* setup the last byte with msb bits from nvmem */ rc = nvmem_reg_read(nvmem, cell->offset + cell->bytes - 1, &v, 1); + if (rc) + goto err; *p |= GENMASK(7, (nbits + bit_offset) % BITS_PER_BYTE) & v; } return buf; +err: + kfree(buf); + return ERR_PTR(rc); } /** -- GitLab From 731b918a624c48dba4a48767668fac57f8a7a42e Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 10 May 2018 05:59:48 -0700 Subject: [PATCH 0504/1291] i40e: free the skb after clearing the bitlock [ Upstream commit c79756cb5f084736b138da9319a02f7c72644548 ] In commit bbc4e7d273b5 ("i40e: fix race condition with PTP_TX_IN_PROGRESS bits") we modified the code which handles Tx timestamps so that we would clear the progress bit as soon as possible. A later commit 0bc0706b46cd ("i40e: check for Tx timestamp timeouts during watchdog") introduced similar code for detecting and handling cleanup of a blocked Tx timestamp. This code did not use the same pattern for cleaning up the skb. Update this code to wait to free the skb until after the bit lock is free, by first setting the ptp_tx_skb to NULL and clearing the lock. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/i40e/i40e_ptp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index d8456c381c99..ef242dbae116 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -337,6 +337,8 @@ void i40e_ptp_rx_hang(struct i40e_pf *pf) **/ void i40e_ptp_tx_hang(struct i40e_pf *pf) { + struct sk_buff *skb; + if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_tx) return; @@ -349,9 +351,12 @@ void i40e_ptp_tx_hang(struct i40e_pf *pf) * within a second it is reasonable to assume that we never will. */ if (time_is_before_jiffies(pf->ptp_tx_start + HZ)) { - dev_kfree_skb_any(pf->ptp_tx_skb); + skb = pf->ptp_tx_skb; pf->ptp_tx_skb = NULL; clear_bit_unlock(__I40E_PTP_TX_IN_PROGRESS, pf->state); + + /* Free the skb after we clear the bitlock */ + dev_kfree_skb_any(skb); pf->tx_hwtstamp_timeouts++; } } -- GitLab From deb1feaad03a78b545c949e54582ae57b3c56982 Mon Sep 17 00:00:00 2001 From: DaeRyong Jeong Date: Tue, 1 May 2018 00:27:04 +0900 Subject: [PATCH 0505/1291] tty: Fix data race in tty_insert_flip_string_fixed_flag [ Upstream commit b6da31b2c07c46f2dcad1d86caa835227a16d9ff ] Unlike normal serials, in pty layer, there is no guarantee that multiple threads don't insert input characters at the same time. If it is happened, tty_insert_flip_string_fixed_flag can be executed concurrently. This can lead slab out-of-bounds write in tty_insert_flip_string_fixed_flag. Call sequences are as follows. CPU0 CPU1 n_tty_ioctl_helper n_tty_ioctl_helper __start_tty tty_send_xchar tty_wakeup pty_write n_hdlc_tty_wakeup tty_insert_flip_string n_hdlc_send_frames tty_insert_flip_string_fixed_flag pty_write tty_insert_flip_string tty_insert_flip_string_fixed_flag To fix the race, acquire port->lock in pty_write() before it inserts input characters to tty buffer. It prevents multiple threads from inserting input characters concurrently. The crash log is as follows: BUG: KASAN: slab-out-of-bounds in tty_insert_flip_string_fixed_flag+0xb5/ 0x130 drivers/tty/tty_buffer.c:316 at addr ffff880114fcc121 Write of size 1792 by task syz-executor0/30017 CPU: 1 PID: 30017 Comm: syz-executor0 Not tainted 4.8.0 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014 0000000000000000 ffff88011638f888 ffffffff81694cc3 ffff88007d802140 ffff880114fcb300 ffff880114fcc300 ffff880114fcb300 ffff88011638f8b0 ffffffff8130075c ffff88011638f940 ffff88007d802140 ffff880194fcc121 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0xb3/0x110 lib/dump_stack.c:51 kasan_object_err+0x1c/0x70 mm/kasan/report.c:156 print_address_description mm/kasan/report.c:194 [inline] kasan_report_error+0x1f7/0x4e0 mm/kasan/report.c:283 kasan_report+0x36/0x40 mm/kasan/report.c:303 check_memory_region_inline mm/kasan/kasan.c:292 [inline] check_memory_region+0x13e/0x1a0 mm/kasan/kasan.c:299 memcpy+0x37/0x50 mm/kasan/kasan.c:335 tty_insert_flip_string_fixed_flag+0xb5/0x130 drivers/tty/tty_buffer.c:316 tty_insert_flip_string include/linux/tty_flip.h:35 [inline] pty_write+0x7f/0xc0 drivers/tty/pty.c:115 n_hdlc_send_frames+0x1d4/0x3b0 drivers/tty/n_hdlc.c:419 n_hdlc_tty_wakeup+0x73/0xa0 drivers/tty/n_hdlc.c:496 tty_wakeup+0x92/0xb0 drivers/tty/tty_io.c:601 __start_tty.part.26+0x66/0x70 drivers/tty/tty_io.c:1018 __start_tty+0x34/0x40 drivers/tty/tty_io.c:1013 n_tty_ioctl_helper+0x146/0x1e0 drivers/tty/tty_ioctl.c:1138 n_hdlc_tty_ioctl+0xb3/0x2b0 drivers/tty/n_hdlc.c:794 tty_ioctl+0xa85/0x16d0 drivers/tty/tty_io.c:2992 vfs_ioctl fs/ioctl.c:43 [inline] do_vfs_ioctl+0x13e/0xba0 fs/ioctl.c:679 SYSC_ioctl fs/ioctl.c:694 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:685 entry_SYSCALL_64_fastpath+0x1f/0xbd Signed-off-by: DaeRyong Jeong Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/pty.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 64338442050e..899e8fe5e00f 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -110,16 +110,19 @@ static void pty_unthrottle(struct tty_struct *tty) static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c) { struct tty_struct *to = tty->link; + unsigned long flags; if (tty->stopped) return 0; if (c > 0) { + spin_lock_irqsave(&to->port->lock, flags); /* Stuff the data into the input queue of the other end */ c = tty_insert_flip_string(to->port, buf, c); /* And shovel */ if (c) tty_flip_buffer_push(to->port); + spin_unlock_irqrestore(&to->port->lock, flags); } return c; } -- GitLab From fa57e8d54494f1a9b636dbd18d566217ef29423b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 May 2018 13:14:33 +0100 Subject: [PATCH 0506/1291] dma-iommu: Fix compilation when !CONFIG_IOMMU_DMA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8a22a3e1e768c309b718f99bd86f9f25a453e0dc ] Inclusion of include/dma-iommu.h when CONFIG_IOMMU_DMA is not selected results in the following splat: In file included from drivers/irqchip/irq-gic-v3-mbi.c:20:0: ./include/linux/dma-iommu.h:95:69: error: unknown type name ‘dma_addr_t’ static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) ^~~~~~~~~~ ./include/linux/dma-iommu.h:108:74: warning: ‘struct list_head’ declared inside parameter list will not be visible outside of this definition or declaration static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) ^~~~~~~~~ scripts/Makefile.build:312: recipe for target 'drivers/irqchip/irq-gic-v3-mbi.o' failed Fix it by including linux/types.h. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Rob Herring Cc: Jason Cooper Cc: Ard Biesheuvel Cc: Srinivas Kandagatla Cc: Thomas Petazzoni Cc: Miquel Raynal Link: https://lkml.kernel.org/r/20180508121438.11301-5-marc.zyngier@arm.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/dma-iommu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 92f20832fd28..e8ca5e654277 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -17,6 +17,7 @@ #define __DMA_IOMMU_H #ifdef __KERNEL__ +#include #include #ifdef CONFIG_IOMMU_DMA -- GitLab From 240bc678f72629b9bf420d7dd75462fdcb70b2a8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 10 May 2018 13:17:30 -0700 Subject: [PATCH 0507/1291] net: phy: phylink: Release link GPIO [ Upstream commit daab3349ad1a69663ccad278ed71d55974d104b4 ] We are not releasing the link GPIO descriptor with gpiod_put() which results in subsequent probing to get -EBUSY when calling fwnode_get_named_gpiod(). Fix this by doing the release in phylink_destroy(). Fixes: 9525ae83959b ("phylink: add phylink infrastructure") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phylink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 1b2fe74a44ea..e4a6ed88b9cf 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -561,6 +561,8 @@ void phylink_destroy(struct phylink *pl) { if (pl->sfp_bus) sfp_unregister_upstream(pl->sfp_bus); + if (!IS_ERR(pl->link_gpio)) + gpiod_put(pl->link_gpio); cancel_work_sync(&pl->resolve); kfree(pl); -- GitLab From adf9ceabfc168c26c83e7160103aa752c136cc1b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 12 Jul 2016 07:21:46 -0400 Subject: [PATCH 0508/1291] media: rcar_jpu: Add missing clk_disable_unprepare() on error in jpu_open() [ Upstream commit 43d0d3c52787df0221d1c52494daabd824fe84f1 ] Add the missing clk_disable_unprepare() before return from jpu_open() in the software reset error handling case. Signed-off-by: Wei Yongjun Acked-by: Mikhail Ulyanov Reviewed-by: Kieran Bingham Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/rcar_jpu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/rcar_jpu.c b/drivers/media/platform/rcar_jpu.c index 070bac36d766..2e2b8c409150 100644 --- a/drivers/media/platform/rcar_jpu.c +++ b/drivers/media/platform/rcar_jpu.c @@ -1280,7 +1280,7 @@ static int jpu_open(struct file *file) /* ...issue software reset */ ret = jpu_reset(jpu); if (ret) - goto device_prepare_rollback; + goto jpu_reset_rollback; } jpu->ref_count++; @@ -1288,6 +1288,8 @@ static int jpu_open(struct file *file) mutex_unlock(&jpu->mutex); return 0; +jpu_reset_rollback: + clk_disable_unprepare(jpu->clk); device_prepare_rollback: mutex_unlock(&jpu->mutex); v4l_prepare_rollback: -- GitLab From 885d2128ab7f7bf469c9441d2178081585003a74 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 9 May 2018 09:28:12 +0900 Subject: [PATCH 0509/1291] libata: Fix command retry decision [ Upstream commit 804689ad2d9b66d0d3920b48cf05881049d44589 ] For failed commands with valid sense data (e.g. NCQ commands), scsi_check_sense() is used in ata_analyze_tf() to determine if the command can be retried. In such case, rely on this decision and ignore the command error mask based decision done in ata_worth_retry(). This fixes useless retries of commands such as unaligned writes on zoned disks (TYPE_ZAC). Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-eh.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 711dd91b5e2c..2651c81d1edf 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2217,12 +2217,16 @@ static void ata_eh_link_autopsy(struct ata_link *link) if (qc->err_mask & ~AC_ERR_OTHER) qc->err_mask &= ~AC_ERR_OTHER; - /* SENSE_VALID trumps dev/unknown error and revalidation */ + /* + * SENSE_VALID trumps dev/unknown error and revalidation. Upper + * layers will determine whether the command is worth retrying + * based on the sense data and device class/type. Otherwise, + * determine directly if the command is worth retrying using its + * error mask and flags. + */ if (qc->flags & ATA_QCFLAG_SENSE_VALID) qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); - - /* determine whether the command is worth retrying */ - if (ata_eh_worth_retry(qc)) + else if (ata_eh_worth_retry(qc)) qc->flags |= ATA_QCFLAG_RETRY; /* accumulate error info */ -- GitLab From ce222fb1256b4c8c712bc894e395cd7640fc54ad Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 13 Apr 2018 14:54:17 +0200 Subject: [PATCH 0510/1291] ACPI / LPSS: Only call pwm_add_table() for Bay Trail PWM if PMIC HRV is 2 [ Upstream commit c975e472ec12392a0c34de1350e634310f8a1dea ] The Point of View mobii wintab p800w Bay Trail tablet comes with a Crystal Cove PMIC, yet uses the LPSS PWM for backlight control, rather then the Crystal Cove's PWM, so we need to call pwm_add_table() to add a pwm_backlight mapping for the LPSS pwm despite there being an INT33FD ACPI device present. On all Bay Trail devices the _HRV object of the INT33FD ACPI device will normally return 2, to indicate the Bay Trail variant of the CRC PMIC is present, except on this tablet where _HRV is 0xffff. I guess this is a hack to make the windows Crystal Cove PWM driver not bind. Out of the 44 DSTDs with an INT33FD device in there which I have (from different model devices) only the pov mobii wintab p800w uses 0xffff for the HRV. The byt_pwm_setup code calls acpi_dev_present to check for the presence of a INT33FD ACPI device which indicates that a CRC PMIC is present and if the INT33FD ACPI device is present then byt_pwm_setup will not add a pwm_backlight mapping for the LPSS pwm, so that the CRC PWM will get used instead. acpi_dev_present has a hrv parameter, this commit make us pass 2 instead of -1, so that things still match on normal tablets, but on this special case with its _HRV of 0xffff, the check will now fail so that the pwm_backlight mapping for the LPSS pwm gets added fixing backlight brightness control on this device. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_lpss.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 602ae58ee2d8..75c3cb377b98 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -69,6 +69,10 @@ ACPI_MODULE_NAME("acpi_lpss"); #define LPSS_SAVE_CTX BIT(4) #define LPSS_NO_D3_DELAY BIT(5) +/* Crystal Cove PMIC shares same ACPI ID between different platforms */ +#define BYT_CRC_HRV 2 +#define CHT_CRC_HRV 3 + struct lpss_private_data; struct lpss_device_desc { @@ -162,7 +166,7 @@ static void byt_pwm_setup(struct lpss_private_data *pdata) if (!adev->pnp.unique_id || strcmp(adev->pnp.unique_id, "1")) return; - if (!acpi_dev_present("INT33FD", NULL, -1)) + if (!acpi_dev_present("INT33FD", NULL, BYT_CRC_HRV)) pwm_add_table(byt_pwm_lookup, ARRAY_SIZE(byt_pwm_lookup)); } -- GitLab From f3766ad7d325be5dae629f16f6027e2acf0a01bc Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 7 May 2018 14:09:46 -0400 Subject: [PATCH 0511/1291] media: media-device: fix ioctl function types [ Upstream commit daa36370b62428cca6d48d1b2530a8419f631c8c ] This change fixes function types for media device ioctls to avoid indirect call mismatches with Control-Flow Integrity checking. Signed-off-by: Sami Tolvanen Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/media-device.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c index e79f72b8b858..62b2c5d9bdfb 100644 --- a/drivers/media/media-device.c +++ b/drivers/media/media-device.c @@ -54,9 +54,10 @@ static int media_device_close(struct file *filp) return 0; } -static int media_device_get_info(struct media_device *dev, - struct media_device_info *info) +static long media_device_get_info(struct media_device *dev, void *arg) { + struct media_device_info *info = arg; + memset(info, 0, sizeof(*info)); if (dev->driver_name[0]) @@ -93,9 +94,9 @@ static struct media_entity *find_entity(struct media_device *mdev, u32 id) return NULL; } -static long media_device_enum_entities(struct media_device *mdev, - struct media_entity_desc *entd) +static long media_device_enum_entities(struct media_device *mdev, void *arg) { + struct media_entity_desc *entd = arg; struct media_entity *ent; ent = find_entity(mdev, entd->id); @@ -146,9 +147,9 @@ static void media_device_kpad_to_upad(const struct media_pad *kpad, upad->flags = kpad->flags; } -static long media_device_enum_links(struct media_device *mdev, - struct media_links_enum *links) +static long media_device_enum_links(struct media_device *mdev, void *arg) { + struct media_links_enum *links = arg; struct media_entity *entity; entity = find_entity(mdev, links->entity); @@ -194,9 +195,9 @@ static long media_device_enum_links(struct media_device *mdev, return 0; } -static long media_device_setup_link(struct media_device *mdev, - struct media_link_desc *linkd) +static long media_device_setup_link(struct media_device *mdev, void *arg) { + struct media_link_desc *linkd = arg; struct media_link *link = NULL; struct media_entity *source; struct media_entity *sink; @@ -222,9 +223,9 @@ static long media_device_setup_link(struct media_device *mdev, return __media_entity_setup_link(link, linkd->flags); } -static long media_device_get_topology(struct media_device *mdev, - struct media_v2_topology *topo) +static long media_device_get_topology(struct media_device *mdev, void *arg) { + struct media_v2_topology *topo = arg; struct media_entity *entity; struct media_interface *intf; struct media_pad *pad; -- GitLab From d49f8498042a5b7f99fff05a7c2adbf3135906d1 Mon Sep 17 00:00:00 2001 From: Brad Love Date: Fri, 4 May 2018 17:53:35 -0400 Subject: [PATCH 0512/1291] media: saa7164: Fix driver name in debug output [ Upstream commit 0cc4655cb57af0b7e105d075c4f83f8046efafe7 ] This issue was reported by a user who downloaded a corrupt saa7164 firmware, then went looking for a valid xc5000 firmware to fix the error displayed...but the device in question has no xc5000, thus after much effort, the wild goose chase eventually led to a support call. The xc5000 has nothing to do with saa7164 (as far as I can tell), so replace the string with saa7164 as well as give a meaningful hint on the firmware mismatch. Signed-off-by: Brad Love Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/saa7164/saa7164-fw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/pci/saa7164/saa7164-fw.c b/drivers/media/pci/saa7164/saa7164-fw.c index ef4906406ebf..a50461861133 100644 --- a/drivers/media/pci/saa7164/saa7164-fw.c +++ b/drivers/media/pci/saa7164/saa7164-fw.c @@ -426,7 +426,8 @@ int saa7164_downloadfirmware(struct saa7164_dev *dev) __func__, fw->size); if (fw->size != fwlength) { - printk(KERN_ERR "xc5000: firmware incorrect size\n"); + printk(KERN_ERR "saa7164: firmware incorrect size %zu != %u\n", + fw->size, fwlength); ret = -ENOMEM; goto out; } -- GitLab From 59d9b120600d981c2cf4b31a34334f75480cb7a3 Mon Sep 17 00:00:00 2001 From: Jane Wan Date: Tue, 8 May 2018 14:19:53 -0700 Subject: [PATCH 0513/1291] mtd: rawnand: fsl_ifc: fix FSL NAND driver to read all ONFI parameter pages [ Upstream commit a75bbe71a27875fdc61cde1af6d799037cef6bed ] Per ONFI specification (Rev. 4.0), if the CRC of the first parameter page read is not valid, the host should read redundant parameter page copies. Fix FSL NAND driver to read the two redundant copies which are mandatory in the specification. Signed-off-by: Jane Wan Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/fsl_ifc_nand.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index 4005b427023c..16deba1a2385 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -342,9 +342,16 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, case NAND_CMD_READID: case NAND_CMD_PARAM: { + /* + * For READID, read 8 bytes that are currently used. + * For PARAM, read all 3 copies of 256-bytes pages. + */ + int len = 8; int timing = IFC_FIR_OP_RB; - if (command == NAND_CMD_PARAM) + if (command == NAND_CMD_PARAM) { timing = IFC_FIR_OP_RBCD; + len = 256 * 3; + } ifc_out32((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | @@ -354,12 +361,8 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, &ifc->ifc_nand.nand_fcr0); ifc_out32(column, &ifc->ifc_nand.row3); - /* - * although currently it's 8 bytes for READID, we always read - * the maximum 256 bytes(for PARAM) - */ - ifc_out32(256, &ifc->ifc_nand.nand_fbcr); - ifc_nand_ctrl->read_bytes = 256; + ifc_out32(len, &ifc->ifc_nand.nand_fbcr); + ifc_nand_ctrl->read_bytes = len; set_addr(mtd, 0, 0, 0); fsl_ifc_run_command(mtd); -- GitLab From af5e8846a5e9a34f61b672954db0810b74b79268 Mon Sep 17 00:00:00 2001 From: Sean Lanigan Date: Fri, 4 May 2018 16:48:23 +1000 Subject: [PATCH 0514/1291] brcmfmac: Add support for bcm43364 wireless chipset [ Upstream commit 9c4a121e82634aa000a702c98cd6f05b27d6e186 ] Add support for the BCM43364 chipset via an SDIO interface, as used in e.g. the Murata 1FX module. The BCM43364 uses the same firmware as the BCM43430 (which is already included), the only difference is the omission of Bluetooth. However, the SDIO_ID for the BCM43364 is 02D0:A9A4, giving it a MODALIAS of sdio:c00v02D0dA9A4, which doesn't get recognised and hence doesn't load the brcmfmac module. Adding the 'A9A4' ID in the appropriate place triggers the brcmfmac driver to load, and then correctly use the firmware file 'brcmfmac43430-sdio.bin'. Signed-off-by: Sean Lanigan Acked-by: Ulf Hansson Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 1 + include/linux/mmc/sdio_ids.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index cd587325e286..dd6e27513cc1 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -1098,6 +1098,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = { BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43340), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43341), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43362), + BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43364), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4335_4339), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4339), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43430), diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index cdd66a5fbd5e..0a7abe8a407f 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -35,6 +35,7 @@ #define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 #define SDIO_DEVICE_ID_BROADCOM_4339 0x4339 #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 +#define SDIO_DEVICE_ID_BROADCOM_43364 0xa9a4 #define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6 #define SDIO_DEVICE_ID_BROADCOM_4345 0x4345 #define SDIO_DEVICE_ID_BROADCOM_43455 0xa9bf -- GitLab From d5d8223d7c67fb005407e0e569ea8cd5b9f4b4fa Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 8 May 2018 10:18:39 +0200 Subject: [PATCH 0515/1291] s390/cpum_sf: Add data entry sizes to sampling trailer entry [ Upstream commit 77715b7ddb446bd39a06f3376e85f4bb95b29bb8 ] The CPU Measurement sampling facility creates a trailer entry for each Sample-Data-Block of stored samples. The trailer entry contains the sizes (in bytes) of the stored sampling types: - basic-sampling data entry size - diagnostic-sampling data entry size Both sizes are 2 bytes long. This patch changes the trailer entry definition to reflect this. Fixes: fcc77f507333 ("s390/cpum_sf: Atomically reset trailer entry fields of sample-data-blocks") Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/cpu_mf.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 05480e4cc5ca..073837f204ee 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -132,7 +132,9 @@ struct hws_trailer_entry { unsigned int f:1; /* 0 - Block Full Indicator */ unsigned int a:1; /* 1 - Alert request control */ unsigned int t:1; /* 2 - Timestamp format */ - unsigned long long:61; /* 3 - 63: Reserved */ + unsigned int :29; /* 3 - 31: Reserved */ + unsigned int bsdes:16; /* 32-47: size of basic SDE */ + unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ }; unsigned long long flags; /* 0 - 63: All indicators */ }; -- GitLab From c9b5d1519c2400281b2c7dd121100725a8d6c11f Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 8 May 2018 07:53:39 +0200 Subject: [PATCH 0516/1291] perf: fix invalid bit in diagnostic entry [ Upstream commit 3c0a83b14ea71fef5ccc93a3bd2de5f892be3194 ] The s390 CPU measurement facility sampling mode supports basic entries and diagnostic entries. Each entry has a valid bit to indicate the status of the entry as valid or invalid. This bit is bit 31 in the diagnostic entry, but the bit mask definition refers to bit 30. Fix this by making the reserved field one bit larger. Fixes: 7e75fc3ff4cf ("s390/cpum_sf: Add raw data sampling to support the diagnostic-sampling function") Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/cpu_mf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 073837f204ee..bc764a674594 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -116,7 +116,7 @@ struct hws_basic_entry { struct hws_diag_entry { unsigned int def:16; /* 0-15 Data Entry Format */ - unsigned int R:14; /* 16-19 and 20-30 reserved */ + unsigned int R:15; /* 16-19 and 20-30 reserved */ unsigned int I:1; /* 31 entry valid or invalid */ u8 data[]; /* Machine-dependent sample data */ } __packed; -- GitLab From 112f47373e364dd508cd9515f626b3fc83fddb10 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 8 May 2018 03:18:39 -0400 Subject: [PATCH 0517/1291] bnxt_en: Check unsupported speeds in bnxt_update_link() on PF only. [ Upstream commit dac0490718bd17df5e3995ffca14255e5f9ed22d ] Only non-NPAR PFs need to actively check and manage unsupported link speeds. NPAR functions and VFs do not control the link speed and should skip the unsupported speed detection logic, to avoid warning messages from firmware rejecting the unsupported firmware calls. Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index bfd2d0382f4c..94931318587c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5927,6 +5927,9 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state) } mutex_unlock(&bp->hwrm_cmd_lock); + if (!BNXT_SINGLE_PF(bp)) + return 0; + diff = link_info->support_auto_speeds ^ link_info->advertising; if ((link_info->support_auto_speeds | diff) != link_info->support_auto_speeds) { -- GitLab From a0e86c016bb4e8c59e57a99bf53762ad313593c5 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Mon, 7 May 2018 19:46:43 -0500 Subject: [PATCH 0518/1291] scsi: 3w-9xxx: fix a missing-check bug [ Upstream commit c9318a3e0218bc9dacc25be46b9eec363259536f ] In twa_chrdev_ioctl(), the ioctl driver command is firstly copied from the userspace pointer 'argp' and saved to the kernel object 'driver_command'. Then a security check is performed on the data buffer size indicated by 'driver_command', which is 'driver_command.buffer_length'. If the security check is passed, the entire ioctl command is copied again from the 'argp' pointer and saved to the kernel object 'tw_ioctl'. Then, various operations are performed on 'tw_ioctl' according to the 'cmd'. Given that the 'argp' pointer resides in userspace, a malicious userspace process can race to change the buffer size between the two copies. This way, the user can bypass the security check and inject invalid data buffer size. This can cause potential security issues in the following execution. This patch checks for capable(CAP_SYS_ADMIN) in twa_chrdev_open()t o avoid the above issues. Signed-off-by: Wenwen Wang Acked-by: Adam Radford Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/3w-9xxx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index 00e7968a1d70..a1388842e17e 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -886,6 +886,11 @@ static int twa_chrdev_open(struct inode *inode, struct file *file) unsigned int minor_number; int retval = TW_IOCTL_ERROR_OS_ENODEV; + if (!capable(CAP_SYS_ADMIN)) { + retval = -EACCES; + goto out; + } + minor_number = iminor(inode); if (minor_number >= twa_device_extension_count) goto out; -- GitLab From ca588ff3e7d649690bb80a1cca4ed3dd591d9cd7 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Mon, 7 May 2018 19:54:01 -0500 Subject: [PATCH 0519/1291] scsi: 3w-xxxx: fix a missing-check bug [ Upstream commit 9899e4d3523faaef17c67141aa80ff2088f17871 ] In tw_chrdev_ioctl(), the length of the data buffer is firstly copied from the userspace pointer 'argp' and saved to the kernel object 'data_buffer_length'. Then a security check is performed on it to make sure that the length is not more than 'TW_MAX_IOCTL_SECTORS * 512'. Otherwise, an error code -EINVAL is returned. If the security check is passed, the entire ioctl command is copied again from the 'argp' pointer and saved to the kernel object 'tw_ioctl'. Then, various operations are performed on 'tw_ioctl' according to the 'cmd'. Given that the 'argp' pointer resides in userspace, a malicious userspace process can race to change the buffer length between the two copies. This way, the user can bypass the security check and inject invalid data buffer length. This can cause potential security issues in the following execution. This patch checks for capable(CAP_SYS_ADMIN) in tw_chrdev_open() to avoid the above issues. Signed-off-by: Wenwen Wang Acked-by: Adam Radford Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/3w-xxxx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c index 33261b690774..f6179e3d6953 100644 --- a/drivers/scsi/3w-xxxx.c +++ b/drivers/scsi/3w-xxxx.c @@ -1033,6 +1033,9 @@ static int tw_chrdev_open(struct inode *inode, struct file *file) dprintk(KERN_WARNING "3w-xxxx: tw_ioctl_open()\n"); + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + minor_number = iminor(inode); if (minor_number >= tw_device_extension_count) return -ENODEV; -- GitLab From 13e4e358b94c2c07257a6b50170af93681ae10eb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 3 May 2018 13:54:32 +0300 Subject: [PATCH 0520/1291] scsi: megaraid: silence a static checker bug [ Upstream commit 27e833dabab74ee665e487e291c9afc6d71effba ] If we had more than 32 megaraid cards then it would cause memory corruption. That's not likely, of course, but it's handy to enforce it and make the static checker happy. Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 7195cff51d4c..9b6f5d024dba 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -4199,6 +4199,9 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) int irq, i, j; int error = -ENODEV; + if (hba_count >= MAX_CONTROLLERS) + goto out; + if (pci_enable_device(pdev)) goto out; pci_set_master(pdev); -- GitLab From ad1562ae81357df085117c3ee61513ec5836c98b Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Wed, 2 May 2018 23:56:31 +0800 Subject: [PATCH 0521/1291] scsi: hisi_sas: config ATA de-reset as an constrained command for v3 hw [ Upstream commit 9413532788df7470297dd0475995c5dc5b07f362 ] As a unconstrained command, a command can be sent to SATA disk even if SATA disk status is BUSY, ERR or DRQ. If an ATA reset assert is successful but ATA reset de-assert fails, then it will retry the reset de-assert. If reset de- assert retry is successful, we think it is okay to probe the device but actually it still has Err status. Apparently we need to retry the ATA reset assertion and de- assertion instead for this mentioned scenario. As such, we config ATA reset assert as a constrained command, if ATA reset de-assert fails, then ATA reset de-assert retry will also fail. Then we will retry the proper process of ATA reset assert and de-assert again. Signed-off-by: Xiang Chen Signed-off-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 2e5fa9717be8..871962b2e2f6 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -328,10 +328,11 @@ enum { #define DIR_TO_DEVICE 2 #define DIR_RESERVED 3 -#define CMD_IS_UNCONSTRAINT(cmd) \ - ((cmd == ATA_CMD_READ_LOG_EXT) || \ - (cmd == ATA_CMD_READ_LOG_DMA_EXT) || \ - (cmd == ATA_CMD_DEV_RESET)) +#define FIS_CMD_IS_UNCONSTRAINED(fis) \ + ((fis.command == ATA_CMD_READ_LOG_EXT) || \ + (fis.command == ATA_CMD_READ_LOG_DMA_EXT) || \ + ((fis.command == ATA_CMD_DEV_RESET) && \ + ((fis.control & ATA_SRST) != 0))) static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off) { @@ -1044,7 +1045,7 @@ static int prep_ata_v3_hw(struct hisi_hba *hisi_hba, << CMD_HDR_FRAME_TYPE_OFF; dw1 |= sas_dev->device_id << CMD_HDR_DEV_ID_OFF; - if (CMD_IS_UNCONSTRAINT(task->ata_task.fis.command)) + if (FIS_CMD_IS_UNCONSTRAINED(task->ata_task.fis)) dw1 |= 1 << CMD_HDR_UNCON_CMD_OFF; hdr->dw1 = cpu_to_le32(dw1); -- GitLab From de3da42dc775f991dd4aa94b4407c06d59be7395 Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Wed, 25 Apr 2018 06:09:02 -0700 Subject: [PATCH 0522/1291] scsi: qedf: Set the UNLOADING flag when removing a vport [ Upstream commit 4f4616ceebaf045c59e8a6aa01f08826d18d5c63 ] Similar to what we do when we remove a PCI function, set the QEDF_UNLOADING flag to prevent any requests from being queued while a vport is being deleted. This prevents any requests from getting stuck in limbo when the vport is unloaded or deleted. Fixes the crash: PID: 106676 TASK: ffff9a436aa90000 CPU: 12 COMMAND: "multipathd" #0 [ffff9a43567d3550] machine_kexec+522 at ffffffffaca60b2a #1 [ffff9a43567d35b0] __crash_kexec+114 at ffffffffacb13512 #2 [ffff9a43567d3680] crash_kexec+48 at ffffffffacb13600 #3 [ffff9a43567d3698] oops_end+168 at ffffffffad117768 #4 [ffff9a43567d36c0] no_context+645 at ffffffffad106f52 #5 [ffff9a43567d3710] __bad_area_nosemaphore+116 at ffffffffad106fe9 #6 [ffff9a43567d3760] bad_area+70 at ffffffffad107379 #7 [ffff9a43567d3788] __do_page_fault+1247 at ffffffffad11a8cf #8 [ffff9a43567d37f0] do_page_fault+53 at ffffffffad11a915 #9 [ffff9a43567d3820] page_fault+40 at ffffffffad116768 [exception RIP: qedf_init_task+61] RIP: ffffffffc0e13c2d RSP: ffff9a43567d38d0 RFLAGS: 00010046 RAX: 0000000000000000 RBX: ffffbe920472c738 RCX: ffff9a434fa0e3e8 RDX: ffff9a434f695280 RSI: ffffbe920472c738 RDI: ffff9a43aa359c80 RBP: ffff9a43567d3950 R8: 0000000000000c15 R9: ffff9a3fb09b9880 R10: ffff9a434fa0e3e8 R11: ffff9a43567d35ce R12: 0000000000000000 R13: ffff9a434f695280 R14: ffff9a43aa359c80 R15: ffff9a3fb9e005c0 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 Signed-off-by: Chad Dupuis Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qedf/qedf_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 7c0064500cc5..382edb79a0de 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -1649,6 +1649,15 @@ static int qedf_vport_destroy(struct fc_vport *vport) struct Scsi_Host *shost = vport_to_shost(vport); struct fc_lport *n_port = shost_priv(shost); struct fc_lport *vn_port = vport->dd_data; + struct qedf_ctx *qedf = lport_priv(vn_port); + + if (!qedf) { + QEDF_ERR(NULL, "qedf is NULL.\n"); + goto out; + } + + /* Set unloading bit on vport qedf_ctx to prevent more I/O */ + set_bit(QEDF_UNLOADING, &qedf->flags); mutex_lock(&n_port->lp_mutex); list_del(&vn_port->list); @@ -1675,6 +1684,7 @@ static int qedf_vport_destroy(struct fc_vport *vport) if (vn_port->host) scsi_host_put(vn_port->host); +out: return 0; } -- GitLab From 52a21fcafa717002402c0e28c9e466950710b96d Mon Sep 17 00:00:00 2001 From: Doug Oucahrek Date: Tue, 1 May 2018 22:22:19 -0700 Subject: [PATCH 0523/1291] staging: lustre: o2iblnd: fix race at kiblnd_connect_peer [ Upstream commit cf04968efe341b9b1c30a527e5dd61b2af9c43d2 ] cmid will be destroyed at OFED if kiblnd_cm_callback return error. if error happen before the end of kiblnd_connect_peer, it will touch destroyed cmid and fail as (o2iblnd_cb.c:1315:kiblnd_connect_peer()) ASSERTION( cmid->device != ((void *)0) ) failed: Signed-off-by: Alexander Boyko Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10015 Reviewed-by: Alexey Lyashkov Reviewed-by: Doug Oucharek Reviewed-by: John L. Hammond Signed-off-by: Doug Oucharek Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index 29e10021b906..4a9b73305e4e 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -1289,11 +1289,6 @@ kiblnd_connect_peer(struct kib_peer *peer) goto failed2; } - LASSERT(cmid->device); - CDEBUG(D_NET, "%s: connection bound to %s:%pI4h:%s\n", - libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname, - &dev->ibd_ifip, cmid->device->name); - return; failed2: @@ -2995,8 +2990,19 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) } else { rc = rdma_resolve_route( cmid, *kiblnd_tunables.kib_timeout * 1000); - if (!rc) + if (!rc) { + struct kib_net *net = peer->ibp_ni->ni_data; + struct kib_dev *dev = net->ibn_dev; + + CDEBUG(D_NET, "%s: connection bound to "\ + "%s:%pI4h:%s\n", + libcfs_nid2str(peer->ibp_nid), + dev->ibd_ifname, + &dev->ibd_ifip, cmid->device->name); + return 0; + } + /* Can't initiate route resolution */ CERROR("Can't resolve route for %s: %d\n", libcfs_nid2str(peer->ibp_nid), rc); -- GitLab From 66eb9942673a55eed1d7267c04da389d5cdb64f9 Mon Sep 17 00:00:00 2001 From: Doug Oucharek Date: Tue, 1 May 2018 23:49:18 -0400 Subject: [PATCH 0524/1291] staging: lustre: o2iblnd: Fix FastReg map/unmap for MLX5 [ Upstream commit 24d4b7c8de007cff9c7d83c06ae76099fdcce008 ] The FastReg support in ko2iblnd was not unmapping pool items causing the items to leak. In addition, the mapping code is not growing the pool like we do with FMR. This patch makes sure we are unmapping FastReg pool elements when we are done with them. It also makes sure the pool will grow when we depleat the pool. Signed-off-by: Doug Oucharek Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9472 Reviewed-on: https://review.whamcloud.com/27015 Reviewed-by: Andrew Perepechko Reviewed-by: Dmitry Eremin Reviewed-by: James Simmons Reviewed-by: Oleg Drokin Signed-off-by: Doug Oucharek Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c | 2 +- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index 284cdd44a2ee..8b92cf06d063 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@ -1710,7 +1710,7 @@ int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx, return 0; } spin_unlock(&fps->fps_lock); - rc = -EBUSY; + rc = -EAGAIN; } spin_lock(&fps->fps_lock); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index 4a9b73305e4e..4b4a20149894 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -47,7 +47,7 @@ static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type, __u64 dstcookie); static void kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn); static void kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn); -static void kiblnd_unmap_tx(struct lnet_ni *ni, struct kib_tx *tx); +static void kiblnd_unmap_tx(struct kib_tx *tx); static void kiblnd_check_sends_locked(struct kib_conn *conn); static void @@ -65,7 +65,7 @@ kiblnd_tx_done(struct lnet_ni *ni, struct kib_tx *tx) LASSERT(!tx->tx_waiting); /* mustn't be awaiting peer response */ LASSERT(tx->tx_pool); - kiblnd_unmap_tx(ni, tx); + kiblnd_unmap_tx(tx); /* tx may have up to 2 lnet msgs to finalise */ lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL; @@ -590,13 +590,9 @@ kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx, struct kib_rdma_desc * return 0; } -static void kiblnd_unmap_tx(struct lnet_ni *ni, struct kib_tx *tx) +static void kiblnd_unmap_tx(struct kib_tx *tx) { - struct kib_net *net = ni->ni_data; - - LASSERT(net); - - if (net->ibn_fmr_ps) + if (tx->fmr.fmr_pfmr || tx->fmr.fmr_frd) kiblnd_fmr_pool_unmap(&tx->fmr, tx->tx_status); if (tx->tx_nfrags) { -- GitLab From b62ed0bbbd63bfec2a28c26d9c9a2d264b3c5ed8 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Apr 2018 13:51:16 +0200 Subject: [PATCH 0525/1291] thermal: exynos: fix setting rising_threshold for Exynos5433 [ Upstream commit 8bfc218d0ebbabcba8ed2b8ec1831e0cf1f71629 ] Add missing clearing of the previous value when setting rising temperature threshold. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/samsung/exynos_tmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index ac83f721db24..d60069b5dc98 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -598,6 +598,7 @@ static int exynos5433_tmu_initialize(struct platform_device *pdev) threshold_code = temp_to_code(data, temp); rising_threshold = readl(data->base + rising_reg_offset); + rising_threshold &= ~(0xff << j * 8); rising_threshold |= (threshold_code << j * 8); writel(rising_threshold, data->base + rising_reg_offset); -- GitLab From 15239633dc559c22aea80b1627176d6bc181b2f7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 3 May 2018 18:37:17 -0700 Subject: [PATCH 0526/1291] bpf: fix references to free_bpf_prog_info() in comments [ Upstream commit ab7f5bf0928be2f148d000a6eaa6c0a36e74750e ] Comments in the verifier refer to free_bpf_prog_info() which seems to have never existed in tree. Replace it with free_used_maps(). Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3ceb269c0ebd..450e2cd31ed6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4110,7 +4110,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) /* hold the map. If the program is rejected by verifier, * the map will be released by release_maps() or it * will be used by the valid program until it's unloaded - * and all maps are released in free_bpf_prog_info() + * and all maps are released in free_used_maps() */ map = bpf_map_inc(map, false); if (IS_ERR(map)) { @@ -4623,7 +4623,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) vfree(log_buf); if (!env->prog->aux->used_maps) /* if we didn't copy map pointers into bpf_prog_info, release - * them now. Otherwise free_bpf_prog_info() will release them. + * them now. Otherwise free_used_maps() will release them. */ release_maps(env); *prog = env->prog; -- GitLab From 4bbf1ce3a1e3efb6c746e0dbf5a5acb889b3b38b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 3 May 2018 23:26:02 -0700 Subject: [PATCH 0527/1291] f2fs: avoid fsync() failure caused by EAGAIN in writepage() [ Upstream commit 5b19d284f5195a925dd015a6397bfce184097378 ] pageout() in MM traslates EAGAIN, so calls handle_write_error() -> mapping_set_error() -> set_bit(AS_EIO, ...). file_write_and_wait_range() will see EIO error, which is critical to return value of fsync() followed by atomic_write failure to user. Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/data.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 36b535207c88..85142e5df88b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1601,7 +1601,13 @@ static int __write_data_page(struct page *page, bool *submitted, redirty_out: redirty_page_for_writepage(wbc, page); - if (!err) + /* + * pageout() in MM traslates EAGAIN, so calls handle_write_error() + * -> mapping_set_error() -> set_bit(AS_EIO, ...). + * file_write_and_wait_range() will see EIO error, which is critical + * to return value of fsync() followed by atomic_write failure to user. + */ + if (!err || wbc->for_reclaim) return AOP_WRITEPAGE_ACTIVATE; unlock_page(page); return err; -- GitLab From cf006139691098ddfcbf70be07bf2a6264df27e1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 20 Apr 2018 08:32:16 -0400 Subject: [PATCH 0528/1291] media: siano: get rid of __le32/__le16 cast warnings [ Upstream commit e1b7f11b37def5f3021c06e8c2b4953e099357aa ] Those are all false-positives that appear with smatch when building for arm: drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16 Get rid of them by adding explicit forced casts. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/common/siano/smsendian.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/media/common/siano/smsendian.c b/drivers/media/common/siano/smsendian.c index bfe831c10b1c..b95a631f23f9 100644 --- a/drivers/media/common/siano/smsendian.c +++ b/drivers/media/common/siano/smsendian.c @@ -35,7 +35,7 @@ void smsendian_handle_tx_message(void *buffer) switch (msg->x_msg_header.msg_type) { case MSG_SMS_DATA_DOWNLOAD_REQ: { - msg->msg_data[0] = le32_to_cpu(msg->msg_data[0]); + msg->msg_data[0] = le32_to_cpu((__force __le32)(msg->msg_data[0])); break; } @@ -44,7 +44,7 @@ void smsendian_handle_tx_message(void *buffer) sizeof(struct sms_msg_hdr))/4; for (i = 0; i < msg_words; i++) - msg->msg_data[i] = le32_to_cpu(msg->msg_data[i]); + msg->msg_data[i] = le32_to_cpu((__force __le32)msg->msg_data[i]); break; } @@ -64,7 +64,7 @@ void smsendian_handle_rx_message(void *buffer) { struct sms_version_res *ver = (struct sms_version_res *) msg; - ver->chip_model = le16_to_cpu(ver->chip_model); + ver->chip_model = le16_to_cpu((__force __le16)ver->chip_model); break; } @@ -81,7 +81,7 @@ void smsendian_handle_rx_message(void *buffer) sizeof(struct sms_msg_hdr))/4; for (i = 0; i < msg_words; i++) - msg->msg_data[i] = le32_to_cpu(msg->msg_data[i]); + msg->msg_data[i] = le32_to_cpu((__force __le32)msg->msg_data[i]); break; } @@ -95,9 +95,9 @@ void smsendian_handle_message_header(void *msg) #ifdef __BIG_ENDIAN struct sms_msg_hdr *phdr = (struct sms_msg_hdr *)msg; - phdr->msg_type = le16_to_cpu(phdr->msg_type); - phdr->msg_length = le16_to_cpu(phdr->msg_length); - phdr->msg_flags = le16_to_cpu(phdr->msg_flags); + phdr->msg_type = le16_to_cpu((__force __le16)phdr->msg_type); + phdr->msg_length = le16_to_cpu((__force __le16)phdr->msg_length); + phdr->msg_flags = le16_to_cpu((__force __le16)phdr->msg_flags); #endif /* __BIG_ENDIAN */ } EXPORT_SYMBOL_GPL(smsendian_handle_message_header); -- GitLab From c6f9830cfb475d71b37f84657091012eee7bfb9d Mon Sep 17 00:00:00 2001 From: Satendra Singh Thakur Date: Thu, 3 May 2018 11:19:32 +0530 Subject: [PATCH 0529/1291] drm/atomic: Handling the case when setting old crtc for plane [ Upstream commit fc2a69f3903dfd97cd47f593e642b47918c949df ] In the func drm_atomic_set_crtc_for_plane, with the current code, if crtc of the plane_state and crtc passed as argument to the func are same, entire func will executed in vein. It will get state of crtc and clear and set the bits in plane_mask. All these steps are not required for same old crtc. Ideally, we should do nothing in this case, this patch handles the same, and causes the program to return without doing anything in such scenario. Signed-off-by: Satendra Singh Thakur Cc: Madhur Verma Cc: Hemanshu Srivastava Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1525326572-25854-1-git-send-email-satendra.t@samsung.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_atomic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 0d8a417e2cd6..bb5cc15fa0b9 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1355,7 +1355,9 @@ drm_atomic_set_crtc_for_plane(struct drm_plane_state *plane_state, { struct drm_plane *plane = plane_state->plane; struct drm_crtc_state *crtc_state; - + /* Nothing to do for same crtc*/ + if (plane_state->crtc == crtc) + return 0; if (plane_state->crtc) { crtc_state = drm_atomic_get_crtc_state(plane_state->state, plane_state->crtc); -- GitLab From ab76f866c8f0936b72813659f70a16cd827ac683 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 2 May 2018 22:48:16 +0900 Subject: [PATCH 0530/1291] ALSA: hda/ca0132: fix build failure when a local macro is defined [ Upstream commit 8e142e9e628975b0dddd05cf1b095331dff6e2de ] DECLARE_TLV_DB_SCALE (alias of SNDRV_CTL_TLVD_DECLARE_DB_SCALE) is used but tlv.h is not included. This causes build failure when local macro is defined by comment-out. This commit fixes the bug. At the same time, the alias macro is replaced with a destination macro added at a commit 46e860f76804 ("ALSA: rename TLV-related macros so that they're friendly to user applications") Reported-by: Connor McAdams Fixes: 44f0c9782cc6 ('ALSA: hda/ca0132: Add tuning controls') Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_ca0132.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 3e73d5c6ccfc..119f3b504765 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -38,6 +38,10 @@ /* Enable this to see controls for tuning purpose. */ /*#define ENABLE_TUNING_CONTROLS*/ +#ifdef ENABLE_TUNING_CONTROLS +#include +#endif + #define FLOAT_ZERO 0x00000000 #define FLOAT_ONE 0x3f800000 #define FLOAT_TWO 0x40000000 @@ -3067,8 +3071,8 @@ static int equalizer_ctl_put(struct snd_kcontrol *kcontrol, return 1; } -static const DECLARE_TLV_DB_SCALE(voice_focus_db_scale, 2000, 100, 0); -static const DECLARE_TLV_DB_SCALE(eq_db_scale, -2400, 100, 0); +static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(voice_focus_db_scale, 2000, 100, 0); +static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(eq_db_scale, -2400, 100, 0); static int add_tuning_control(struct hda_codec *codec, hda_nid_t pnid, hda_nid_t nid, -- GitLab From f0b0debbb49dc14b2deddfd226b22ad7c5496828 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Mon, 26 Mar 2018 17:26:25 +0800 Subject: [PATCH 0531/1291] mmc: dw_mmc: update actual clock for mmc debugfs [ Upstream commit ff178981bd5fd1667f373098740cb1c6d6efa1ba ] Respect the actual clock for mmc debugfs to help better debug the hardware. mmc_host mmc0: Bus speed (slot 0) = 135475200Hz (slot req 150000000Hz, actual 135475200HZ div = 0) cat /sys/kernel/debug/mmc0/ios clock: 150000000 Hz actual clock: 135475200 Hz vdd: 21 (3.3 ~ 3.4 V) bus mode: 2 (push-pull) chip select: 0 (don't care) power mode: 2 (on) bus width: 3 (8 bits) timing spec: 9 (mmc HS200) signal voltage: 0 (1.80 V) driver type: 0 (driver type B) Cc: Xiao Yao Cc: Ziyuan Signed-off-by: Shawn Lin Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/dw_mmc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 6a2cbbba29aa..5252885e5cda 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -1255,6 +1255,8 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit) if (host->state == STATE_WAITING_CMD11_DONE) sdmmc_cmd_bits |= SDMMC_CMD_VOLT_SWITCH; + slot->mmc->actual_clock = 0; + if (!clock) { mci_writel(host, CLKENA, 0); mci_send_cmd(slot, sdmmc_cmd_bits, 0); @@ -1313,6 +1315,8 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit) /* keep the last clock value that was requested from core */ slot->__clk_old = clock; + slot->mmc->actual_clock = div ? ((host->bus_hz / div) >> 1) : + host->bus_hz; } host->current_speed = clock; -- GitLab From d6ce4f1bf79a007a8c391c93b58eef15b2fffb07 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Mon, 26 Mar 2018 17:33:14 +1100 Subject: [PATCH 0532/1291] mmc: pwrseq: Use kmalloc_array instead of stack VLA [ Upstream commit 486e6661367b40f927aadbed73237693396cbf94 ] The use of stack Variable Length Arrays needs to be avoided, as they can be a vector for stack exhaustion, which can be both a runtime bug (kernel Oops) or a security flaw (overwriting memory beyond the stack). Also, in general, as code evolves it is easy to lose track of how big a VLA can get. Thus, we can end up having runtime failures that are hard to debug. As part of the directive[1] to remove all VLAs from the kernel, and build with -Wvla. Currently driver is using a VLA declared using the number of descriptors. This array is used to store integer values and is later used as an argument to `gpiod_set_array_value_cansleep()` This can be avoided by using `kmalloc_array()` to allocate memory for the array of integer values. Memory is free'd before return from function. >From the code it appears that it is safe to sleep so we can use GFP_KERNEL (based _cansleep() suffix of function `gpiod_set_array_value_cansleep()`. It can be expected that this patch will result in a small increase in overhead due to the use of `kmalloc_array()` [1] https://lkml.org/lkml/2018/3/7/621 Signed-off-by: Tobin C. Harding Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/pwrseq_simple.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c index 13ef162cf066..a8b9fee4d62a 100644 --- a/drivers/mmc/core/pwrseq_simple.c +++ b/drivers/mmc/core/pwrseq_simple.c @@ -40,14 +40,18 @@ static void mmc_pwrseq_simple_set_gpios_value(struct mmc_pwrseq_simple *pwrseq, struct gpio_descs *reset_gpios = pwrseq->reset_gpios; if (!IS_ERR(reset_gpios)) { - int i; - int values[reset_gpios->ndescs]; + int i, *values; + int nvalues = reset_gpios->ndescs; - for (i = 0; i < reset_gpios->ndescs; i++) + values = kmalloc_array(nvalues, sizeof(int), GFP_KERNEL); + if (!values) + return; + + for (i = 0; i < nvalues; i++) values[i] = value; - gpiod_set_array_value_cansleep( - reset_gpios->ndescs, reset_gpios->desc, values); + gpiod_set_array_value_cansleep(nvalues, reset_gpios->desc, values); + kfree(values); } } -- GitLab From c9e5888ec8ae248d30b499a0add51047ef135b03 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 22 Apr 2018 12:53:28 +0200 Subject: [PATCH 0533/1291] dt-bindings: pinctrl: meson: add support for the Meson8m2 SoC [ Upstream commit 03d9fbc39730b3e6b2e7047dc85f0f70de8fb97d ] The Meson8m2 SoC is a variant of Meson8 with some updates from Meson8b (such as the Gigabit capable DesignWare MAC). It is mostly pin compatible with Meson8, only 10 (existing) CBUS pins get an additional function (four of these are Ethernet RXD2, RXD3, TXD2 and TXD3 which are required when the board uses an RGMII PHY). The AOBUS pins seem to be identical on Meson8 and Meson8m2. Signed-off-by: Martin Blumenstingl Reviewed-by: Rob Herring Reviewed-by: Kevin Hilman Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt index 2392557ede27..df77d394edc0 100644 --- a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt @@ -3,8 +3,10 @@ Required properties for the root node: - compatible: one of "amlogic,meson8-cbus-pinctrl" "amlogic,meson8b-cbus-pinctrl" + "amlogic,meson8m2-cbus-pinctrl" "amlogic,meson8-aobus-pinctrl" "amlogic,meson8b-aobus-pinctrl" + "amlogic,meson8m2-aobus-pinctrl" "amlogic,meson-gxbb-periphs-pinctrl" "amlogic,meson-gxbb-aobus-pinctrl" "amlogic,meson-gxl-periphs-pinctrl" -- GitLab From 17b26041363f7d4589e23bcad21f62dbfcf50bf8 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sun, 29 Apr 2018 01:46:23 +0300 Subject: [PATCH 0534/1291] spi: meson-spicc: Fix error handling in meson_spicc_probe() [ Upstream commit ded5fa4e8bac25612caab8f0822691308a28a552 ] If devm_spi_register_master() fails in meson_spicc_probe(), spicc->core is left undisabled. The patch fixes that. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Reviewed-by: Neil Armstrong Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-meson-spicc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index 7f8429635502..a5b0df7e6131 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -574,10 +574,15 @@ static int meson_spicc_probe(struct platform_device *pdev) master->max_speed_hz = rate >> 2; ret = devm_spi_register_master(&pdev->dev, master); - if (!ret) - return 0; + if (ret) { + dev_err(&pdev->dev, "spi master registration failed\n"); + goto out_clk; + } - dev_err(&pdev->dev, "spi master registration failed\n"); + return 0; + +out_clk: + clk_disable_unprepare(spicc->core); out_master: spi_master_put(master); -- GitLab From c3b540c06954615a58c74fd4658d715fdf8955a4 Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Tue, 1 May 2018 19:55:59 +0100 Subject: [PATCH 0535/1291] net: hns3: Fixes the out of bounds access in hclge_map_tqp [ Upstream commit 38e62046d4c95272e2fb001d2d72baf48fa090e9 ] This patch fixes the handling of the check when number of vports are detected to be more than available TPQs. Current handling causes an out of bounds access in hclge_map_tqp(). Fixes: 7df7dad633e2 ("net: hns3: Refactor the mapping of tqp to vport") Signed-off-by: Huazhong Tan Signed-off-by: Peng Li Signed-off-by: Salil Mehta Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index ff7a70ffafc6..c133491ad9fa 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1272,8 +1272,11 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) /* We need to alloc a vport for main NIC of PF */ num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; - if (hdev->num_tqps < num_vport) - num_vport = hdev->num_tqps; + if (hdev->num_tqps < num_vport) { + dev_err(&hdev->pdev->dev, "tqps(%d) is less than vports(%d)", + hdev->num_tqps, num_vport); + return -EINVAL; + } /* Alloc the same number of TQPs for every vport */ tqp_per_vport = hdev->num_tqps / num_vport; -- GitLab From 232703c9091f18b57f371fe6bbe0716ef5c09df5 Mon Sep 17 00:00:00 2001 From: Yixun Lan Date: Sat, 28 Apr 2018 10:21:10 +0000 Subject: [PATCH 0536/1291] dt-bindings: net: meson-dwmac: new compatible name for AXG SoC [ Upstream commit 7e5d05e18ba1ed491c6f836edee7f0b90f3167bc ] We need to introduce a new compatible name for the Meson-AXG SoC in order to support the RMII 100M ethernet PHY, since the PRG_ETH0 register of the dwmac glue layer is changed from previous old SoC. Signed-off-by: Yixun Lan Reviewed-by: Rob Herring Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/net/meson-dwmac.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/meson-dwmac.txt b/Documentation/devicetree/bindings/net/meson-dwmac.txt index 354dd9896bb5..910187ebf1ce 100644 --- a/Documentation/devicetree/bindings/net/meson-dwmac.txt +++ b/Documentation/devicetree/bindings/net/meson-dwmac.txt @@ -10,6 +10,7 @@ Required properties on all platforms: - "amlogic,meson6-dwmac" - "amlogic,meson8b-dwmac" - "amlogic,meson-gxbb-dwmac" + - "amlogic,meson-axg-dwmac" Additionally "snps,dwmac" and any applicable more detailed version number described in net/stmmac.txt should be used. -- GitLab From 4531135811a5bf90011c99fe850a45a16a584f8b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 14 Jan 2018 22:07:10 +0100 Subject: [PATCH 0537/1291] backlight: pwm_bl: Don't use GPIOF_* with gpiod_get_direction [ Upstream commit bb084c0f61d659f0e6d371b096e0e57998f191d6 ] The documentation was wrong, gpiod_get_direction() returns 0/1 instead of the GPIOF_* flags. The docs were fixed with commit 94fc73094abe47 ("gpio: correct docs about return value of gpiod_get_direction"). Now, fix this user (until a better, system-wide solution is in place). Signed-off-by: Wolfram Sang Acked-by: Daniel Thompson Reviewed-by: Simon Horman Signed-off-by: Lee Jones Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/video/backlight/pwm_bl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index 1c2289ddd555..0fa7d2bd0e48 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -301,14 +301,14 @@ static int pwm_backlight_probe(struct platform_device *pdev) /* * If the GPIO is not known to be already configured as output, that - * is, if gpiod_get_direction returns either GPIOF_DIR_IN or -EINVAL, - * change the direction to output and set the GPIO as active. + * is, if gpiod_get_direction returns either 1 or -EINVAL, change the + * direction to output and set the GPIO as active. * Do not force the GPIO to active when it was already output as it * could cause backlight flickering or we would enable the backlight too * early. Leave the decision of the initial backlight state for later. */ if (pb->enable_gpio && - gpiod_get_direction(pb->enable_gpio) != GPIOF_DIR_OUT) + gpiod_get_direction(pb->enable_gpio) != 0) gpiod_direction_output(pb->enable_gpio, 1); pb->power_supply = devm_regulator_get(&pdev->dev, "power"); -- GitLab From da2b62c740def7d1e9d7ce4506e8b1b7a2514e89 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 23 Apr 2018 21:16:35 +0200 Subject: [PATCH 0538/1291] stop_machine: Use raw spinlocks [ Upstream commit de5b55c1d4e30740009864eb35ce4ed856aac01d ] Use raw-locks in stop_machine() to allow locking in irq-off and preempt-disabled regions on -RT. This also documents the possible locking context in general. [bigeasy: update patch description.] Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Link: https://lkml.kernel.org/r/20180423191635.6014-1-bigeasy@linutronix.de Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/stop_machine.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 2f6fa95de2d8..1ff523dae6e2 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -37,7 +37,7 @@ struct cpu_stop_done { struct cpu_stopper { struct task_struct *thread; - spinlock_t lock; + raw_spinlock_t lock; bool enabled; /* is this stopper enabled? */ struct list_head works; /* list of pending works */ @@ -81,13 +81,13 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) unsigned long flags; bool enabled; - spin_lock_irqsave(&stopper->lock, flags); + raw_spin_lock_irqsave(&stopper->lock, flags); enabled = stopper->enabled; if (enabled) __cpu_stop_queue_work(stopper, work, &wakeq); else if (work->done) cpu_stop_signal_done(work->done); - spin_unlock_irqrestore(&stopper->lock, flags); + raw_spin_unlock_irqrestore(&stopper->lock, flags); wake_up_q(&wakeq); @@ -237,8 +237,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, DEFINE_WAKE_Q(wakeq); int err; retry: - spin_lock_irq(&stopper1->lock); - spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); + raw_spin_lock_irq(&stopper1->lock); + raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); err = -ENOENT; if (!stopper1->enabled || !stopper2->enabled) @@ -261,8 +261,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, __cpu_stop_queue_work(stopper1, work1, &wakeq); __cpu_stop_queue_work(stopper2, work2, &wakeq); unlock: - spin_unlock(&stopper2->lock); - spin_unlock_irq(&stopper1->lock); + raw_spin_unlock(&stopper2->lock); + raw_spin_unlock_irq(&stopper1->lock); if (unlikely(err == -EDEADLK)) { while (stop_cpus_in_progress) @@ -461,9 +461,9 @@ static int cpu_stop_should_run(unsigned int cpu) unsigned long flags; int run; - spin_lock_irqsave(&stopper->lock, flags); + raw_spin_lock_irqsave(&stopper->lock, flags); run = !list_empty(&stopper->works); - spin_unlock_irqrestore(&stopper->lock, flags); + raw_spin_unlock_irqrestore(&stopper->lock, flags); return run; } @@ -474,13 +474,13 @@ static void cpu_stopper_thread(unsigned int cpu) repeat: work = NULL; - spin_lock_irq(&stopper->lock); + raw_spin_lock_irq(&stopper->lock); if (!list_empty(&stopper->works)) { work = list_first_entry(&stopper->works, struct cpu_stop_work, list); list_del_init(&work->list); } - spin_unlock_irq(&stopper->lock); + raw_spin_unlock_irq(&stopper->lock); if (work) { cpu_stop_fn_t fn = work->fn; @@ -554,7 +554,7 @@ static int __init cpu_stop_init(void) for_each_possible_cpu(cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); - spin_lock_init(&stopper->lock); + raw_spin_lock_init(&stopper->lock); INIT_LIST_HEAD(&stopper->works); } -- GitLab From c06f5a018f710ff24ef7c1b922d2b6704c35dd8c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 23 Apr 2018 18:10:23 +0200 Subject: [PATCH 0539/1291] delayacct: Use raw_spinlocks [ Upstream commit 02acc80d19edb0d5684c997b2004ad19f9f5236e ] try_to_wake_up() might invoke delayacct_blkio_end() while holding the pi_lock (which is a raw_spinlock_t). delayacct_blkio_end() acquires task_delay_info.lock which is a spinlock_t. This causes a might sleep splat on -RT where non raw spinlocks are converted to 'sleeping' spinlocks. task_delay_info.lock is only held for a short amount of time so it's not a problem latency wise to make convert it to a raw spinlock. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Balbir Singh Link: https://lkml.kernel.org/r/20180423161024.6710-1-bigeasy@linutronix.de Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/delayacct.h | 2 +- kernel/delayacct.c | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 41ee6dea7531..31c865d1842e 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -29,7 +29,7 @@ #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info { - spinlock_t lock; + raw_spinlock_t lock; unsigned int flags; /* Private per-task flags */ /* For each stat XXX, add following, aligned appropriately diff --git a/kernel/delayacct.c b/kernel/delayacct.c index e2764d767f18..ca8ac2824f0b 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -44,23 +44,24 @@ void __delayacct_tsk_init(struct task_struct *tsk) { tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL); if (tsk->delays) - spin_lock_init(&tsk->delays->lock); + raw_spin_lock_init(&tsk->delays->lock); } /* * Finish delay accounting for a statistic using its timestamps (@start), * accumalator (@total) and @count */ -static void delayacct_end(spinlock_t *lock, u64 *start, u64 *total, u32 *count) +static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, + u32 *count) { s64 ns = ktime_get_ns() - *start; unsigned long flags; if (ns > 0) { - spin_lock_irqsave(lock, flags); + raw_spin_lock_irqsave(lock, flags); *total += ns; (*count)++; - spin_unlock_irqrestore(lock, flags); + raw_spin_unlock_irqrestore(lock, flags); } } @@ -127,7 +128,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ - spin_lock_irqsave(&tsk->delays->lock, flags); + raw_spin_lock_irqsave(&tsk->delays->lock, flags); tmp = d->blkio_delay_total + tsk->delays->blkio_delay; d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; tmp = d->swapin_delay_total + tsk->delays->swapin_delay; @@ -137,7 +138,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) d->blkio_count += tsk->delays->blkio_count; d->swapin_count += tsk->delays->swapin_count; d->freepages_count += tsk->delays->freepages_count; - spin_unlock_irqrestore(&tsk->delays->lock, flags); + raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); return 0; } @@ -147,10 +148,10 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk) __u64 ret; unsigned long flags; - spin_lock_irqsave(&tsk->delays->lock, flags); + raw_spin_lock_irqsave(&tsk->delays->lock, flags); ret = nsec_to_clock_t(tsk->delays->blkio_delay + tsk->delays->swapin_delay); - spin_unlock_irqrestore(&tsk->delays->lock, flags); + raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); return ret; } -- GitLab From 9746d3696de90457a0e7880d4e8ed3d8fe52f8bc Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 9 Apr 2018 22:28:27 +0300 Subject: [PATCH 0540/1291] memory: tegra: Do not handle spurious interrupts [ Upstream commit bf3fbdfbec947cdd04b2f2c4bce11534c8786eee ] The ISR reads interrupts-enable mask, but doesn't utilize it. Apply the mask to the interrupt status and don't handle interrupts that MC driver haven't asked for. Kernel would disable spurious MC IRQ and report the error. This would happen only in a case of a very severe bug. Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/memory/tegra/mc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index a4803ac192bb..d2005b995821 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -252,8 +252,11 @@ static irqreturn_t tegra_mc_irq(int irq, void *data) unsigned int bit; /* mask all interrupts to avoid flooding */ - status = mc_readl(mc, MC_INTSTATUS); mask = mc_readl(mc, MC_INTMASK); + status = mc_readl(mc, MC_INTSTATUS) & mask; + + if (!status) + return IRQ_NONE; for_each_set_bit(bit, &status, 32) { const char *error = status_names[bit] ?: "unknown"; -- GitLab From 503f22cf7b2a5fcd4dfb0041eca03028a6284fab Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 9 Apr 2018 22:28:29 +0300 Subject: [PATCH 0541/1291] memory: tegra: Apply interrupts mask per SoC [ Upstream commit 1c74d5c0de0c2cc29fef97a19251da2ad6f579bd ] Currently we are enabling handling of interrupts specific to Tegra124+ which happen to overlap with previous generations. Let's specify interrupts mask per SoC generation for consistency and in a preparation of squashing of Tegra20 driver into the common one that will enable handling of GART faults which may be undesirable by newer generations. Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/memory/tegra/mc.c | 21 +++------------------ drivers/memory/tegra/mc.h | 9 +++++++++ drivers/memory/tegra/tegra114.c | 2 ++ drivers/memory/tegra/tegra124.c | 6 ++++++ drivers/memory/tegra/tegra210.c | 3 +++ drivers/memory/tegra/tegra30.c | 2 ++ include/soc/tegra/mc.h | 2 ++ 7 files changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index d2005b995821..1d49a8dd4a37 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -20,14 +20,6 @@ #include "mc.h" #define MC_INTSTATUS 0x000 -#define MC_INT_DECERR_MTS (1 << 16) -#define MC_INT_SECERR_SEC (1 << 13) -#define MC_INT_DECERR_VPR (1 << 12) -#define MC_INT_INVALID_APB_ASID_UPDATE (1 << 11) -#define MC_INT_INVALID_SMMU_PAGE (1 << 10) -#define MC_INT_ARBITRATION_EMEM (1 << 9) -#define MC_INT_SECURITY_VIOLATION (1 << 8) -#define MC_INT_DECERR_EMEM (1 << 6) #define MC_INTMASK 0x004 @@ -248,13 +240,11 @@ static const char *const error_names[8] = { static irqreturn_t tegra_mc_irq(int irq, void *data) { struct tegra_mc *mc = data; - unsigned long status, mask; + unsigned long status; unsigned int bit; /* mask all interrupts to avoid flooding */ - mask = mc_readl(mc, MC_INTMASK); - status = mc_readl(mc, MC_INTSTATUS) & mask; - + status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask; if (!status) return IRQ_NONE; @@ -349,7 +339,6 @@ static int tegra_mc_probe(struct platform_device *pdev) const struct of_device_id *match; struct resource *res; struct tegra_mc *mc; - u32 value; int err; match = of_match_node(tegra_mc_of_match, pdev->dev.of_node); @@ -417,11 +406,7 @@ static int tegra_mc_probe(struct platform_device *pdev) WARN(!mc->soc->client_id_mask, "Missing client ID mask for this SoC\n"); - value = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR | - MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE | - MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM; - - mc_writel(mc, value, MC_INTMASK); + mc_writel(mc, mc->soc->intmask, MC_INTMASK); return 0; } diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h index ddb16676c3af..24e020b4609b 100644 --- a/drivers/memory/tegra/mc.h +++ b/drivers/memory/tegra/mc.h @@ -14,6 +14,15 @@ #include +#define MC_INT_DECERR_MTS (1 << 16) +#define MC_INT_SECERR_SEC (1 << 13) +#define MC_INT_DECERR_VPR (1 << 12) +#define MC_INT_INVALID_APB_ASID_UPDATE (1 << 11) +#define MC_INT_INVALID_SMMU_PAGE (1 << 10) +#define MC_INT_ARBITRATION_EMEM (1 << 9) +#define MC_INT_SECURITY_VIOLATION (1 << 8) +#define MC_INT_DECERR_EMEM (1 << 6) + static inline u32 mc_readl(struct tegra_mc *mc, unsigned long offset) { return readl(mc->regs + offset); diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c index ba8fff3d66a6..6d2a5a849d92 100644 --- a/drivers/memory/tegra/tegra114.c +++ b/drivers/memory/tegra/tegra114.c @@ -930,4 +930,6 @@ const struct tegra_mc_soc tegra114_mc_soc = { .atom_size = 32, .client_id_mask = 0x7f, .smmu = &tegra114_smmu_soc, + .intmask = MC_INT_INVALID_SMMU_PAGE | MC_INT_SECURITY_VIOLATION | + MC_INT_DECERR_EMEM, }; diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c index 5a58e440f4a7..9f68a56f2727 100644 --- a/drivers/memory/tegra/tegra124.c +++ b/drivers/memory/tegra/tegra124.c @@ -1020,6 +1020,9 @@ const struct tegra_mc_soc tegra124_mc_soc = { .smmu = &tegra124_smmu_soc, .emem_regs = tegra124_mc_emem_regs, .num_emem_regs = ARRAY_SIZE(tegra124_mc_emem_regs), + .intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR | + MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE | + MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM, }; #endif /* CONFIG_ARCH_TEGRA_124_SOC */ @@ -1042,5 +1045,8 @@ const struct tegra_mc_soc tegra132_mc_soc = { .atom_size = 32, .client_id_mask = 0x7f, .smmu = &tegra132_smmu_soc, + .intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR | + MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE | + MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM, }; #endif /* CONFIG_ARCH_TEGRA_132_SOC */ diff --git a/drivers/memory/tegra/tegra210.c b/drivers/memory/tegra/tegra210.c index 5e144abe4c18..47c78a6d8f00 100644 --- a/drivers/memory/tegra/tegra210.c +++ b/drivers/memory/tegra/tegra210.c @@ -1077,4 +1077,7 @@ const struct tegra_mc_soc tegra210_mc_soc = { .atom_size = 64, .client_id_mask = 0xff, .smmu = &tegra210_smmu_soc, + .intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR | + MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE | + MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM, }; diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c index b44737840e70..d0689428ea1a 100644 --- a/drivers/memory/tegra/tegra30.c +++ b/drivers/memory/tegra/tegra30.c @@ -952,4 +952,6 @@ const struct tegra_mc_soc tegra30_mc_soc = { .atom_size = 16, .client_id_mask = 0x7f, .smmu = &tegra30_smmu_soc, + .intmask = MC_INT_INVALID_SMMU_PAGE | MC_INT_SECURITY_VIOLATION | + MC_INT_DECERR_EMEM, }; diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h index 44202ff897fd..f759e0918037 100644 --- a/include/soc/tegra/mc.h +++ b/include/soc/tegra/mc.h @@ -99,6 +99,8 @@ struct tegra_mc_soc { u8 client_id_mask; const struct tegra_smmu_soc *smmu; + + u32 intmask; }; struct tegra_mc { -- GitLab From 2ee4fbcd27f97006748656c791fcf59dec0ebf3f Mon Sep 17 00:00:00 2001 From: Wei Xu Date: Thu, 26 Apr 2018 14:59:19 -0600 Subject: [PATCH 0542/1291] nvme: lightnvm: add granby support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ea48e877994f086af481427bac110aa63686c3ce ] Add a new lightnvm quirk to identify CNEX’s Granby controller. Signed-off-by: Wei Xu Reviewed-by: Javier González Reviewed-by: Matias Bjørling Signed-off-by: Keith Busch Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6a76e3974240..f5643d107cc6 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2565,6 +2565,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ .driver_data = NVME_QUIRK_LIGHTNVM, }, + { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */ + .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, -- GitLab From c7ab132d7e3de0a9020bf5b3e91fc6fe0736031d Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Mon, 16 Apr 2018 11:39:57 -0300 Subject: [PATCH 0543/1291] arm64: defconfig: Enable Rockchip io-domain driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7c8b77f81552c2b0e5d9c560da70bc4149ce66a5 ] Heiko Stübner justified pretty well the change in commit e330eb86ba0b ("ARM: multi_v7_defconfig: enable Rockchip io-domain driver"). This change is also needed for arm64 rockchip boards, so, do the same for arm64. The io-domain driver is necessary to notify the soc about voltages changes happening on supplying regulators. Probably the most important user right now is the mmc tuning code, where the soc needs to get notified when the voltage is dropped to the 1.8V point. As this option is necessary to successfully tune UHS cards etc, it should get built in. Otherwise, tuning will fail with, dwmmc_rockchip fe320000.dwmmc: All phases bad! mmc0: tuning execution failed: -5 Signed-off-by: Enric Balletbo i Serra Acked-by: Robin Murphy Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/configs/defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 34480e9af2e7..b05796578e7a 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -302,6 +302,8 @@ CONFIG_GPIO_XGENE_SB=y CONFIG_GPIO_PCA953X=y CONFIG_GPIO_PCA953X_IRQ=y CONFIG_GPIO_MAX77620=y +CONFIG_POWER_AVS=y +CONFIG_ROCKCHIP_IODOMAIN=y CONFIG_POWER_RESET_MSM=y CONFIG_POWER_RESET_XGENE=y CONFIG_POWER_RESET_SYSCON=y -- GitLab From 75d1087174d27e2ac835f04ba782e39b7e54096d Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Tue, 10 Apr 2018 10:49:51 -0700 Subject: [PATCH 0544/1291] igb: Fix queue selection on MAC filters on i210 [ Upstream commit 4dc93fcf0b95dc3fda4db917effae31fbb8ad2a8 ] On the RAH registers there are semantic differences on the meaning of the "queue" parameter for traffic steering depending on the controller model: there is the 82575 meaning, which "queue" means a RX Hardware Queue, and the i350 meaning, where it is a reception pool. The previous behaviour was having no effect for i210 based controllers because the QSEL bit of the RAH register wasn't being set. This patch separates the condition in discrete cases, so the different handling is clearer. Fixes: 83c21335c876 ("igb: improve MAC filter handling") Signed-off-by: Vinicius Costa Gomes Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/igb/igb_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 6ca580cdfd84..1c027f9d9af5 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8376,12 +8376,17 @@ static void igb_rar_set_index(struct igb_adapter *adapter, u32 index) if (is_valid_ether_addr(addr)) rar_high |= E1000_RAH_AV; - if (hw->mac.type == e1000_82575) + switch (hw->mac.type) { + case e1000_82575: + case e1000_i210: rar_high |= E1000_RAH_POOL_1 * adapter->mac_table[index].queue; - else + break; + default: rar_high |= E1000_RAH_POOL_1 << adapter->mac_table[index].queue; + break; + } } wr32(E1000_RAL(index), rar_low); -- GitLab From 18a48a7a4418381d03492de3dba8cfee5ca5d14f Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 24 Apr 2018 15:14:57 +0200 Subject: [PATCH 0545/1291] drm/gma500: fix psb_intel_lvds_mode_valid()'s return type [ Upstream commit 2ea009095c6e7396915a1d0dd480c41f02985f79 ] The method struct drm_connector_helper_funcs::mode_valid is defined as returning an 'enum drm_mode_status' but the driver implementation for this method, psb_intel_lvds_mode_valid(), uses an 'int' for it. Fix this by using 'enum drm_mode_status' for psb_intel_lvds_mode_valid(). Signed-off-by: Luc Van Oostenryck Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180424131458.2060-1-luc.vanoostenryck@gmail.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/gma500/psb_intel_drv.h | 2 +- drivers/gpu/drm/gma500/psb_intel_lvds.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/gma500/psb_intel_drv.h b/drivers/gpu/drm/gma500/psb_intel_drv.h index e8e4ea14b12b..e05e5399af2d 100644 --- a/drivers/gpu/drm/gma500/psb_intel_drv.h +++ b/drivers/gpu/drm/gma500/psb_intel_drv.h @@ -255,7 +255,7 @@ extern int intelfb_remove(struct drm_device *dev, extern bool psb_intel_lvds_mode_fixup(struct drm_encoder *encoder, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); -extern int psb_intel_lvds_mode_valid(struct drm_connector *connector, +extern enum drm_mode_status psb_intel_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode); extern int psb_intel_lvds_set_property(struct drm_connector *connector, struct drm_property *property, diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c index be3eefec5152..8baf6325c6e4 100644 --- a/drivers/gpu/drm/gma500/psb_intel_lvds.c +++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c @@ -343,7 +343,7 @@ static void psb_intel_lvds_restore(struct drm_connector *connector) } } -int psb_intel_lvds_mode_valid(struct drm_connector *connector, +enum drm_mode_status psb_intel_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_psb_private *dev_priv = connector->dev->dev_private; -- GitLab From e660508795d6cc8e509be68596164d6574111149 Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Tue, 24 Apr 2018 03:56:37 +0100 Subject: [PATCH 0546/1291] ipconfig: Correctly initialise ic_nameservers [ Upstream commit 300eec7c0a2495f771709c7642aa15f7cc148b83 ] ic_nameservers, which stores the list of name servers discovered by ipconfig, is initialised (i.e. has all of its elements set to NONE, or 0xffffffff) by ic_nameservers_predef() in the following scenarios: - before the "ip=" and "nfsaddrs=" kernel command line parameters are parsed (in ip_auto_config_setup()); - before autoconfiguring via DHCP or BOOTP (in ic_bootp_init()), in order to clear any values that may have been set after parsing "ip=" or "nfsaddrs=" and are no longer needed. This means that ic_nameservers_predef() is not called when neither "ip=" nor "nfsaddrs=" is specified on the kernel command line. In this scenario, every element in ic_nameservers remains set to 0x00000000, which is indistinguishable from ANY and causes pnp_seq_show() to write the following (bogus) information to /proc/net/pnp: #MANUAL nameserver 0.0.0.0 nameserver 0.0.0.0 nameserver 0.0.0.0 This is potentially problematic for systems that blindly link /etc/resolv.conf to /proc/net/pnp. Ensure that ic_nameservers is also initialised when neither "ip=" nor "nfsaddrs=" are specified by calling ic_nameservers_predef() in ip_auto_config(), but only when ip_auto_config_setup() was not called earlier. This causes the following to be written to /proc/net/pnp, and is consistent with what gets written when ipconfig is configured manually but no name servers are specified on the kernel command line: #MANUAL Signed-off-by: Chris Novakovic Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipconfig.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index abdebca848c9..f0782c91514c 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -781,6 +781,11 @@ static void __init ic_bootp_init_ext(u8 *e) */ static inline void __init ic_bootp_init(void) { + /* Re-initialise all name servers to NONE, in case any were set via the + * "ip=" or "nfsaddrs=" kernel command line parameters: any IP addresses + * specified there will already have been decoded but are no longer + * needed + */ ic_nameservers_predef(); dev_add_pack(&bootp_packet_type); @@ -1402,6 +1407,13 @@ static int __init ip_auto_config(void) int err; unsigned int i; + /* Initialise all name servers to NONE (but only if the "ip=" or + * "nfsaddrs=" kernel command line parameters weren't decoded, otherwise + * we'll overwrite the IP addresses specified there) + */ + if (ic_set_manually == 0) + ic_nameservers_predef(); + #ifdef CONFIG_PROC_FS proc_create("pnp", S_IRUGO, init_net.proc_net, &pnp_seq_fops); #endif /* CONFIG_PROC_FS */ @@ -1622,6 +1634,7 @@ static int __init ip_auto_config_setup(char *addrs) return 1; } + /* Initialise all name servers to NONE */ ic_nameservers_predef(); /* Parse string for static IP assignment. */ -- GitLab From 0be8aa812cef251b4208bb51fadf3510ded74f33 Mon Sep 17 00:00:00 2001 From: Siva Rebbagondla Date: Wed, 11 Apr 2018 12:13:32 +0530 Subject: [PATCH 0547/1291] rsi: Fix 'invalid vdd' warning in mmc [ Upstream commit 78e450719c702784e42af6da912d3692fd3da0cb ] While performing cleanup, driver is messing with card->ocr value by not masking rocr against ocr_avail. Below panic is observed with some of the SDIO host controllers due to this. Issue is resolved by reverting incorrect modifications to vdd. [ 927.423821] mmc1: Invalid vdd 0x1f [ 927.423925] Modules linked in: rsi_sdio(+) cmac bnep arc4 rsi_91x mac80211 cfg80211 btrsi rfcomm bluetooth ecdh_generic [ 927.424073] CPU: 0 PID: 1624 Comm: insmod Tainted: G W 4.15.0-1000-caracalla #1 [ 927.424075] Hardware name: Dell Inc. Edge Gateway 3003/ , BIOS 01.00.06 01/22/2018 [ 927.424082] RIP: 0010:sdhci_set_power_noreg+0xdd/0x190[sdhci] [ 927.424085] RSP: 0018:ffffac3fc064b930 EFLAGS: 00010282 [ 927.424107] Call Trace: [ 927.424118] sdhci_set_power+0x5a/0x60 [sdhci] [ 927.424125] sdhci_set_ios+0x360/0x3b0 [sdhci] [ 927.424133] mmc_set_initial_state+0x92/0x120 [ 927.424137] mmc_power_up.part.34+0x33/0x1d0 [ 927.424141] mmc_power_up+0x17/0x20 [ 927.424147] mmc_sdio_runtime_resume+0x2d/0x50 [ 927.424151] mmc_runtime_resume+0x17/0x20 [ 927.424156] __rpm_callback+0xc4/0x200 [ 927.424161] ? idr_alloc_cyclic+0x57/0xd0 [ 927.424165] ? mmc_runtime_suspend+0x20/0x20 [ 927.424169] rpm_callback+0x24/0x80 [ 927.424172] ? mmc_runtime_suspend+0x20/0x20 [ 927.424176] rpm_resume+0x4b3/0x6c0 [ 927.424181] __pm_runtime_resume+0x4e/0x80 [ 927.424188] driver_probe_device+0x41/0x490 [ 927.424192] __driver_attach+0xdf/0xf0 [ 927.424196] ? driver_probe_device+0x490/0x490 [ 927.424201] bus_for_each_dev+0x6c/0xc0 [ 927.424205] driver_attach+0x1e/0x20 [ 927.424209] bus_add_driver+0x1f4/0x270 [ 927.424217] ? rsi_sdio_ack_intr+0x50/0x50 [rsi_sdio] [ 927.424221] driver_register+0x60/0xe0 [ 927.424227] ? rsi_sdio_ack_intr+0x50/0x50 [rsi_sdio] [ 927.424231] sdio_register_driver+0x20/0x30 [ 927.424237] rsi_module_init+0x16/0x40 [rsi_sdio] Signed-off-by: Siva Rebbagondla Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rsi/rsi_91x_sdio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index 370161ca2a1c..2545f8f8f3fb 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -161,7 +161,6 @@ static void rsi_reset_card(struct sdio_func *pfunction) int err; struct mmc_card *card = pfunction->card; struct mmc_host *host = card->host; - s32 bit = (fls(host->ocr_avail) - 1); u8 cmd52_resp; u32 clock, resp, i; u16 rca; @@ -181,7 +180,6 @@ static void rsi_reset_card(struct sdio_func *pfunction) msleep(20); /* Initialize the SDIO card */ - host->ios.vdd = bit; host->ios.chip_select = MMC_CS_DONTCARE; host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN; host->ios.power_mode = MMC_POWER_UP; -- GitLab From e7cb8f11f6e2341a167c1b23c58716171f3fcfb9 Mon Sep 17 00:00:00 2001 From: Siva Rebbagondla Date: Wed, 11 Apr 2018 12:13:31 +0530 Subject: [PATCH 0548/1291] rsi: fix nommu_map_sg overflow kernel panic [ Upstream commit f700546682a62a87a9615121a37ee7452dab4b76 ] Following overflow kernel panic is observed on some platforms while loading the driver. It is fixed if dynamically allocated memory is passed to SDIO instead of static one [ 927.513963] nommu_map_sg: overflow 17d54064ba7c+20 of device mask ffffffff [ 927.517712] Modules linked in: rsi_sdio(+) cmac bnep arc4 rsi_91x mac80211 cfg80211 btrsi rfcomm bluetooth ecdh_generic snd_soc_sst_bytcr_rt5660 [ 927.517861] CPU: 0 PID: 1624 Comm: insmod Tainted: G W 4.15.0-1000 #1 [ 927.517870] RIP: 0010:sdhci_send_command+0x5f0/0xa90 [sdhci] [ 927.517873] RSP: 0000:ffffac3fc064b6d8 EFLAGS: 00010086 [ 927.517895] Call Trace: [ 927.517908] ? __schedule+0x3cd/0x890 [ 927.517915] ? mod_timer+0x17b/0x3c0 [ 927.517922] sdhci_request+0x7c/0xf0 [sdhci] [ 927.517928] __mmc_start_request+0x5a/0x170 [ 927.517932] mmc_start_request+0x74/0x90 [ 927.517936] mmc_wait_for_req+0x87/0xe0 [ 927.517940] mmc_io_rw_extended+0x2fd/0x330 [ 927.517946] ? mmc_wait_data_done+0x30/0x30 [ 927.517951] sdio_io_rw_ext_helper+0x160/0x210 [ 927.517956] sdio_writesb+0x1d/0x20 [ 927.517966] rsi_sdio_write_register_multiple+0x68/0x110 [rsi_sdio] [ 927.517976] rsi_hal_device_init+0x357/0x910 [rsi_91x] [ 927.517983] ? rsi_hal_device_init+0x357/0x910 [rsi_91x] [ 927.517990] rsi_probe+0x2c6/0x450 [rsi_sdio] [ 927.517995] sdio_bus_probe+0xfc/0x110 [ 927.518000] driver_probe_device+0x2b3/0x490 [ 927.518005] __driver_attach+0xdf/0xf0 [ 927.518008] ? driver_probe_device+0x490/0x490 [ 927.518014] bus_for_each_dev+0x6c/0xc0 [ 927.518018] driver_attach+0x1e/0x20 [ 927.518021] bus_add_driver+0x1f4/0x270 [ 927.518028] ? rsi_sdio_ack_intr+0x50/0x50 [rsi_sdio] [ 927.518031] driver_register+0x60/0xe0 [ 927.518038] ? rsi_sdio_ack_intr+0x50/0x50 [rsi_sdio] [ 927.518041] sdio_register_driver+0x20/0x30 [ 927.518047] rsi_module_init+0x16/0x40 [rsi_sdio] Signed-off-by: Siva Rebbagondla Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rsi/rsi_91x_hal.c | 35 +++++++++++++++---------- drivers/net/wireless/rsi/rsi_91x_sdio.c | 21 ++++++++++----- drivers/net/wireless/rsi/rsi_sdio.h | 2 +- 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index 070dfd68bb83..120b0ff545c1 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -557,28 +557,32 @@ static int bl_write_header(struct rsi_hw *adapter, u8 *flash_content, u32 content_size) { struct rsi_host_intf_ops *hif_ops = adapter->host_intf_ops; - struct bl_header bl_hdr; + struct bl_header *bl_hdr; u32 write_addr, write_len; int status; - bl_hdr.flags = 0; - bl_hdr.image_no = cpu_to_le32(adapter->priv->coex_mode); - bl_hdr.check_sum = cpu_to_le32( - *(u32 *)&flash_content[CHECK_SUM_OFFSET]); - bl_hdr.flash_start_address = cpu_to_le32( - *(u32 *)&flash_content[ADDR_OFFSET]); - bl_hdr.flash_len = cpu_to_le32(*(u32 *)&flash_content[LEN_OFFSET]); + bl_hdr = kzalloc(sizeof(*bl_hdr), GFP_KERNEL); + if (!bl_hdr) + return -ENOMEM; + + bl_hdr->flags = 0; + bl_hdr->image_no = cpu_to_le32(adapter->priv->coex_mode); + bl_hdr->check_sum = + cpu_to_le32(*(u32 *)&flash_content[CHECK_SUM_OFFSET]); + bl_hdr->flash_start_address = + cpu_to_le32(*(u32 *)&flash_content[ADDR_OFFSET]); + bl_hdr->flash_len = cpu_to_le32(*(u32 *)&flash_content[LEN_OFFSET]); write_len = sizeof(struct bl_header); if (adapter->rsi_host_intf == RSI_HOST_INTF_USB) { write_addr = PING_BUFFER_ADDRESS; status = hif_ops->write_reg_multiple(adapter, write_addr, - (u8 *)&bl_hdr, write_len); + (u8 *)bl_hdr, write_len); if (status < 0) { rsi_dbg(ERR_ZONE, "%s: Failed to load Version/CRC structure\n", __func__); - return status; + goto fail; } } else { write_addr = PING_BUFFER_ADDRESS >> 16; @@ -587,20 +591,23 @@ static int bl_write_header(struct rsi_hw *adapter, u8 *flash_content, rsi_dbg(ERR_ZONE, "%s: Unable to set ms word to common reg\n", __func__); - return status; + goto fail; } write_addr = RSI_SD_REQUEST_MASTER | (PING_BUFFER_ADDRESS & 0xFFFF); status = hif_ops->write_reg_multiple(adapter, write_addr, - (u8 *)&bl_hdr, write_len); + (u8 *)bl_hdr, write_len); if (status < 0) { rsi_dbg(ERR_ZONE, "%s: Failed to load Version/CRC structure\n", __func__); - return status; + goto fail; } } - return 0; + status = 0; +fail: + kfree(bl_hdr); + return status; } static u32 read_flash_capacity(struct rsi_hw *adapter) diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index 2545f8f8f3fb..0362967874aa 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -968,17 +968,21 @@ static void ulp_read_write(struct rsi_hw *adapter, u16 addr, u32 data, /*This function resets and re-initializes the chip.*/ static void rsi_reset_chip(struct rsi_hw *adapter) { - __le32 data; + u8 *data; u8 sdio_interrupt_status = 0; u8 request = 1; int ret; + data = kzalloc(sizeof(u32), GFP_KERNEL); + if (!data) + return; + rsi_dbg(INFO_ZONE, "Writing disable to wakeup register\n"); ret = rsi_sdio_write_register(adapter, 0, SDIO_WAKEUP_REG, &request); if (ret < 0) { rsi_dbg(ERR_ZONE, "%s: Failed to write SDIO wakeup register\n", __func__); - return; + goto err; } msleep(20); ret = rsi_sdio_read_register(adapter, RSI_FN1_INT_REGISTER, @@ -986,7 +990,7 @@ static void rsi_reset_chip(struct rsi_hw *adapter) if (ret < 0) { rsi_dbg(ERR_ZONE, "%s: Failed to Read Intr Status Register\n", __func__); - return; + goto err; } rsi_dbg(INFO_ZONE, "%s: Intr Status Register value = %d\n", __func__, sdio_interrupt_status); @@ -996,17 +1000,17 @@ static void rsi_reset_chip(struct rsi_hw *adapter) rsi_dbg(ERR_ZONE, "%s: Unable to set ms word to common reg\n", __func__); - return; + goto err; } - data = TA_HOLD_THREAD_VALUE; + put_unaligned_le32(TA_HOLD_THREAD_VALUE, data); if (rsi_sdio_write_register_multiple(adapter, TA_HOLD_THREAD_REG | RSI_SD_REQUEST_MASTER, - (u8 *)&data, 4)) { + data, 4)) { rsi_dbg(ERR_ZONE, "%s: Unable to hold Thread-Arch processor threads\n", __func__); - return; + goto err; } /* This msleep will ensure Thread-Arch processor to go to hold @@ -1027,6 +1031,9 @@ static void rsi_reset_chip(struct rsi_hw *adapter) * read write operations to complete for chip reset. */ msleep(500); +err: + kfree(data); + return; } /** diff --git a/drivers/net/wireless/rsi/rsi_sdio.h b/drivers/net/wireless/rsi/rsi_sdio.h index 903392039200..6788fbbdd166 100644 --- a/drivers/net/wireless/rsi/rsi_sdio.h +++ b/drivers/net/wireless/rsi/rsi_sdio.h @@ -85,7 +85,7 @@ enum sdio_interrupt_type { #define TA_SOFT_RST_CLR 0 #define TA_SOFT_RST_SET BIT(0) #define TA_PC_ZERO 0 -#define TA_HOLD_THREAD_VALUE cpu_to_le32(0xF) +#define TA_HOLD_THREAD_VALUE 0xF #define TA_RELEASE_THREAD_VALUE cpu_to_le32(0xF) #define TA_BASE_ADDR 0x2200 #define MISC_CFG_BASE_ADDR 0x4105 -- GitLab From 06d6d1ad241d9df531e1d8af4be6c468cb830d8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ondrej=20Mosn=C3=A1=C4=8Dek?= Date: Mon, 9 Apr 2018 10:00:06 +0200 Subject: [PATCH 0549/1291] audit: allow not equal op for audit by executable [ Upstream commit 23bcc480dac204c7dbdf49d96b2c918ed98223c2 ] Current implementation of auditing by executable name only implements the 'equal' operator. This patch extends it to also support the 'not equal' operator. See: https://github.com/linux-audit/audit-kernel/issues/53 Signed-off-by: Ondrej Mosnacek Reviewed-by: Richard Guy Briggs Signed-off-by: Paul Moore Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/auditfilter.c | 2 +- kernel/auditsc.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 0b0aa5854dac..8dd4063647c2 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -407,7 +407,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f) return -EINVAL; break; case AUDIT_EXE: - if (f->op != Audit_equal) + if (f->op != Audit_not_equal && f->op != Audit_equal) return -EINVAL; if (entry->rule.listnr != AUDIT_FILTER_EXIT) return -EINVAL; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ecc23e25c9eb..677053a2fb57 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -471,6 +471,8 @@ static int audit_filter_rules(struct task_struct *tsk, break; case AUDIT_EXE: result = audit_exe_compare(tsk, rule->exe); + if (f->op == Audit_not_equal) + result = !result; break; case AUDIT_UID: result = audit_uid_comparator(cred->uid, f->op, f->uid); -- GitLab From fea5a0d8781644daaee936a84f14dae5e7fc0795 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 31 Mar 2018 22:09:37 +0200 Subject: [PATCH 0550/1291] staging: vchiq_core: Fix missing semaphore release in error case [ Upstream commit 8113b89fc615cfb531df0334fb3a091cf6a45ce0 ] The bail out branch in case of a invalid tx_pos missed a semaphore release. Dan Carpenter found this with a static checker. Fixes: d1eab9dec610 ("staging: vchiq_core: Bail out in case of invalid tx_pos") Reported-by: Dan Carpenter Signed-off-by: Stefan Wahren Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c index 486be990d7fc..a457034818c3 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c @@ -601,6 +601,7 @@ reserve_space(VCHIQ_STATE_T *state, size_t space, int is_blocking) } if (tx_pos == (state->slot_queue_available * VCHIQ_SLOT_SIZE)) { + up(&state->slot_available_event); pr_warn("%s: invalid tx_pos: %d\n", __func__, tx_pos); return NULL; } -- GitLab From e906355a7f9665c8ec07739f8561d0c783ee5dad Mon Sep 17 00:00:00 2001 From: James Simmons Date: Mon, 16 Apr 2018 00:15:10 -0400 Subject: [PATCH 0551/1291] staging: lustre: llite: correct removexattr detection [ Upstream commit 1b60f6dfa38403ff7c4d0b4b7ecdb810f9789a2a ] In ll_xattr_set_common() detect the removexattr() case correctly by testing for a NULL value as well as XATTR_REPLACE. Signed-off-by: John L. Hammond Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10787 Reviewed-on: https://review.whamcloud.com/ Reviewed-by: Dmitry Eremin Reviewed-by: James Simmons Signed-off-by: James Simmons Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lustre/llite/xattr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c index 0be55623bac4..364d697b2690 100644 --- a/drivers/staging/lustre/lustre/llite/xattr.c +++ b/drivers/staging/lustre/lustre/llite/xattr.c @@ -93,7 +93,11 @@ ll_xattr_set_common(const struct xattr_handler *handler, __u64 valid; int rc; - if (flags == XATTR_REPLACE) { + /* When setxattr() is called with a size of 0 the value is + * unconditionally replaced by "". When removexattr() is + * called we get a NULL value and XATTR_REPLACE for flags. + */ + if (!value && flags == XATTR_REPLACE) { ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1); valid = OBD_MD_FLXATTRRM; } else { -- GitLab From 41e1f1feee0634c0acb3727980607c7784c7d494 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 29 Mar 2018 15:26:48 +1100 Subject: [PATCH 0552/1291] staging: lustre: ldlm: free resource when ldlm_lock_create() fails. [ Upstream commit d8caf662b4aeeb2ac83ac0b22e40db88e9360c77 ] ldlm_lock_create() gets a resource, but don't put it on all failure paths. It should. Signed-off-by: NeilBrown Reviewed-by: James Simmons Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lustre/ldlm/ldlm_lock.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c index b5d84f3f6071..11e01c48f51a 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c @@ -1571,8 +1571,10 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns, return ERR_CAST(res); lock = ldlm_lock_new(res); - if (!lock) + if (!lock) { + ldlm_resource_putref(res); return ERR_PTR(-ENOMEM); + } lock->l_req_mode = mode; lock->l_ast_data = data; @@ -1615,6 +1617,8 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns, return ERR_PTR(rc); } + + /** * Enqueue (request) a lock. * On the client this is called from ldlm_cli_enqueue_fini -- GitLab From 62079c8371aaa08f072a3e41628e753f9696d6df Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 23 Mar 2018 10:58:31 -0700 Subject: [PATCH 0553/1291] serial: core: Make sure compiler barfs for 16-byte earlycon names [ Upstream commit c1c734cb1f54b062f7e67ffc9656d82f5b412b9c ] As part of bringup I ended up wanting to call an earlycon driver by a name that was exactly 16-bytes big, specifically "qcom_geni_serial". Unfortunately, when I tried this I found that things compiled just fine. They just didn't work. Specifically the compiler felt perfectly justified in initting the ".name" field of "struct earlycon_id" with the full 16-bytes and just skipping the '\0'. Needless to say, that behavior didn't seem ideal, but I guess someone must have allowed it for a reason. One way to fix this is to shorten the name field to 15 bytes and then add an extra byte after that nobody touches. This should always be initted to 0 and we're golden. There are, of course, other ways to fix this too. We could audit all the users of the "name" field and make them stop at both null termination or at 16 bytes. We could also just make the name field much bigger so that we're not likely to run into this. ...but both seem like we'll just hit the bug again. Signed-off-by: Douglas Anderson Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 74fc82d22310..868b60a79c0b 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -348,7 +348,8 @@ struct earlycon_device { }; struct earlycon_id { - char name[16]; + char name[15]; + char name_term; /* In case compiler didn't '\0' term name */ char compatible[128]; int (*setup)(struct earlycon_device *, const char *options); }; -- GitLab From 6cfd0d3c627f845ca6bcc560a57c0bb2632e4ebf Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Tue, 10 Apr 2018 11:32:09 -0700 Subject: [PATCH 0554/1291] soc: imx: gpcv2: Do not pass static memory as platform data [ Upstream commit 050f810e238f268670f14a8f8b793ba2dbf2e92f ] Platform device core assumes the ownership of dev.platform_data as well as that it is dynamically allocated and it will try to kfree it as a part of platform_device_release(). Change the code to use platform_device_add_data() n instead of a pointer to a static memory to avoid causing a BUG() when calling platform_device_put(). The problem can be reproduced by artificially enabling the error path of platform_device_add() call (around line 357). Note that this change also allows us to constify imx7_pgc_domains, since we no longer need to be able to modify it. Cc: Stefan Agner Cc: Lucas Stach Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Andrey Smirnov Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/soc/imx/gpcv2.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/soc/imx/gpcv2.c b/drivers/soc/imx/gpcv2.c index afc7ecc3c187..f4e3bd40c72e 100644 --- a/drivers/soc/imx/gpcv2.c +++ b/drivers/soc/imx/gpcv2.c @@ -155,7 +155,7 @@ static int imx7_gpc_pu_pgc_sw_pdn_req(struct generic_pm_domain *genpd) return imx7_gpc_pu_pgc_sw_pxx_req(genpd, false); } -static struct imx7_pgc_domain imx7_pgc_domains[] = { +static const struct imx7_pgc_domain imx7_pgc_domains[] = { [IMX7_POWER_DOMAIN_MIPI_PHY] = { .genpd = { .name = "mipi-phy", @@ -321,11 +321,6 @@ static int imx_gpcv2_probe(struct platform_device *pdev) continue; } - domain = &imx7_pgc_domains[domain_index]; - domain->regmap = regmap; - domain->genpd.power_on = imx7_gpc_pu_pgc_sw_pup_req; - domain->genpd.power_off = imx7_gpc_pu_pgc_sw_pdn_req; - pd_pdev = platform_device_alloc("imx7-pgc-domain", domain_index); if (!pd_pdev) { @@ -334,7 +329,20 @@ static int imx_gpcv2_probe(struct platform_device *pdev) return -ENOMEM; } - pd_pdev->dev.platform_data = domain; + ret = platform_device_add_data(pd_pdev, + &imx7_pgc_domains[domain_index], + sizeof(imx7_pgc_domains[domain_index])); + if (ret) { + platform_device_put(pd_pdev); + of_node_put(np); + return ret; + } + + domain = pd_pdev->dev.platform_data; + domain->regmap = regmap; + domain->genpd.power_on = imx7_gpc_pu_pgc_sw_pup_req; + domain->genpd.power_off = imx7_gpc_pu_pgc_sw_pdn_req; + pd_pdev->dev.parent = dev; pd_pdev->dev.of_node = np; -- GitLab From d136b7ab24b8df3136f96babe0b4e369c1e6c120 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 10 Apr 2018 15:05:42 +0200 Subject: [PATCH 0555/1291] microblaze: Fix simpleImage format generation [ Upstream commit ece97f3a5fb50cf5f98886fbc63c9665f2bb199d ] simpleImage generation was broken for some time. This patch is fixing steps how simpleImage.*.ub file is generated. Steps are objdump of vmlinux and create .ub. Also make sure that there is striped elf version with .strip suffix. Signed-off-by: Michal Simek Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/microblaze/boot/Makefile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile index 47f94cc383b6..7c2f52d4a0e4 100644 --- a/arch/microblaze/boot/Makefile +++ b/arch/microblaze/boot/Makefile @@ -22,17 +22,19 @@ $(obj)/linux.bin.gz: $(obj)/linux.bin FORCE quiet_cmd_cp = CP $< $@$2 cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false) -quiet_cmd_strip = STRIP $@ +quiet_cmd_strip = STRIP $< $@$2 cmd_strip = $(STRIP) -K microblaze_start -K _end -K __log_buf \ - -K _fdt_start vmlinux -o $@ + -K _fdt_start $< -o $@$2 UIMAGE_LOADADDR = $(CONFIG_KERNEL_BASE_ADDR) +UIMAGE_IN = $@ +UIMAGE_OUT = $@.ub $(obj)/simpleImage.%: vmlinux FORCE $(call if_changed,cp,.unstrip) $(call if_changed,objcopy) $(call if_changed,uimage) - $(call if_changed,strip) - @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' + $(call if_changed,strip,.strip) + @echo 'Kernel: $(UIMAGE_OUT) is ready' ' (#'`cat .version`')' clean-files += simpleImage.*.unstrip linux.bin.ub dts/*.dtb -- GitLab From f17d397bfed4371bbefcb1a9d497ea9cd057df3e Mon Sep 17 00:00:00 2001 From: Dominik Bozek Date: Fri, 13 Apr 2018 10:42:31 -0700 Subject: [PATCH 0556/1291] usb: hub: Don't wait for connect state at resume for powered-off ports [ Upstream commit 5d111f5190848d6fb1c414dc57797efea3526a2f ] wait_for_connected() wait till a port change status to USB_PORT_STAT_CONNECTION, but this is not possible if the port is unpowered. The loop will only exit at timeout. Such case take place if an over-current incident happen while system is in S3. Then during resume wait_for_connected() will wait 2s, which may be noticeable by the user. Signed-off-by: Dominik Bozek Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a8bc48b26c23..a9db0887edca 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -3361,6 +3361,10 @@ static int wait_for_connected(struct usb_device *udev, while (delay_ms < 2000) { if (status || *portstatus & USB_PORT_STAT_CONNECTION) break; + if (!port_is_power_on(hub, *portstatus)) { + status = -ENODEV; + break; + } msleep(20); delay_ms += 20; status = hub_port_status(hub, *port1, portstatus, portchange); -- GitLab From a07fc8dd2bd4f790940c9a5ac3b7fae1e6738800 Mon Sep 17 00:00:00 2001 From: Tudor-Dan Ambarus Date: Tue, 3 Apr 2018 09:39:01 +0300 Subject: [PATCH 0557/1291] crypto: authencesn - don't leak pointers to authenc keys [ Upstream commit 31545df391d58a3bb60e29b1192644a6f2b5a8dd ] In crypto_authenc_esn_setkey we save pointers to the authenc keys in a local variable of type struct crypto_authenc_keys and we don't zeroize it after use. Fix this and don't leak pointers to the authenc keys. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- crypto/authencesn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 0cf5fefdb859..6de852ce4cf8 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -90,6 +90,7 @@ static int crypto_authenc_esn_setkey(struct crypto_aead *authenc_esn, const u8 * CRYPTO_TFM_RES_MASK); out: + memzero_explicit(&keys, sizeof(keys)); return err; badkey: -- GitLab From 09740f513e3fab823bce5daecc6caf00f1d0b82c Mon Sep 17 00:00:00 2001 From: Tudor-Dan Ambarus Date: Tue, 3 Apr 2018 09:39:00 +0300 Subject: [PATCH 0558/1291] crypto: authenc - don't leak pointers to authenc keys [ Upstream commit ad2fdcdf75d169e7a5aec6c7cb421c0bec8ec711 ] In crypto_authenc_setkey we save pointers to the authenc keys in a local variable of type struct crypto_authenc_keys and we don't zeroize it after use. Fix this and don't leak pointers to the authenc keys. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- crypto/authenc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/authenc.c b/crypto/authenc.c index 875470b0e026..0db344d5a01a 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -108,6 +108,7 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key, CRYPTO_TFM_RES_MASK); out: + memzero_explicit(&keys, sizeof(keys)); return err; badkey: -- GitLab From 4886bf00337f4c70a2bffc0e2e5cdbf813342a95 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 14 Mar 2018 11:41:36 -0400 Subject: [PATCH 0559/1291] media: omap3isp: fix unbalanced dma_iommu_mapping [ Upstream commit b7e1e6859fbf60519fd82d7120cee106a6019512 ] The OMAP3 ISP driver manages its MMU mappings through the IOMMU-aware ARM DMA backend. The current code creates a dma_iommu_mapping and attaches this to the ISP device, but never detaches the mapping in either the probe failure paths or the driver remove path resulting in an unbalanced mapping refcount and a memory leak. Fix this properly. Reported-by: Pavel Machek Signed-off-by: Suman Anna Tested-by: Pavel Machek Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/omap3isp/isp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 1a428fe9f070..9f023bc6e1b7 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -1945,6 +1945,7 @@ static int isp_initialize_modules(struct isp_device *isp) static void isp_detach_iommu(struct isp_device *isp) { + arm_iommu_detach_device(isp->dev); arm_iommu_release_mapping(isp->mapping); isp->mapping = NULL; } @@ -1961,8 +1962,7 @@ static int isp_attach_iommu(struct isp_device *isp) mapping = arm_iommu_create_mapping(&platform_bus_type, SZ_1G, SZ_2G); if (IS_ERR(mapping)) { dev_err(isp->dev, "failed to create ARM IOMMU mapping\n"); - ret = PTR_ERR(mapping); - goto error; + return PTR_ERR(mapping); } isp->mapping = mapping; @@ -1977,7 +1977,8 @@ static int isp_attach_iommu(struct isp_device *isp) return 0; error: - isp_detach_iommu(isp); + arm_iommu_release_mapping(isp->mapping); + isp->mapping = NULL; return ret; } -- GitLab From fb2b60e27a16c4bd5054bc518ec3d786036b7dce Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 18 Apr 2018 08:54:18 -0700 Subject: [PATCH 0560/1291] regulator: Don't return or expect -errno from of_map_mode() [ Upstream commit 02f3703934a42417021405ef336fe45add13c3d1 ] In of_get_regulation_constraints() we were taking the result of of_map_mode() (an unsigned int) and assigning it to an int. We were then checking whether this value was -EINVAL. Some implementers of of_map_mode() were returning -EINVAL (even though the return type of their function needed to be unsigned int) because they needed to signal an error back to of_get_regulation_constraints(). In general in the regulator framework the mode is always referred to as an unsigned int. While we could fix this to be a signed int (the highest value we store in there right now is 0x8), it's actually pretty clean to just define the regulator mode 0x0 (the lack of any bits set) as an invalid mode. Let's do that. Fixes: 5e5e3a42c653 ("regulator: of: Add support for parsing initial and suspend modes") Suggested-by: Javier Martinez Canillas Signed-off-by: Douglas Anderson Reviewed-by: Javier Martinez Canillas Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/cpcap-regulator.c | 2 +- drivers/regulator/of_regulator.c | 13 +++++++------ drivers/regulator/twl-regulator.c | 2 +- include/linux/regulator/consumer.h | 1 + 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/regulator/cpcap-regulator.c b/drivers/regulator/cpcap-regulator.c index f541b80f1b54..bd910fe123d9 100644 --- a/drivers/regulator/cpcap-regulator.c +++ b/drivers/regulator/cpcap-regulator.c @@ -222,7 +222,7 @@ static unsigned int cpcap_map_mode(unsigned int mode) case CPCAP_BIT_AUDIO_LOW_PWR: return REGULATOR_MODE_STANDBY; default: - return -EINVAL; + return REGULATOR_MODE_INVALID; } } diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index c9875355905d..a3bf7c993723 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -31,6 +31,7 @@ static void of_get_regulation_constraints(struct device_node *np, struct regulation_constraints *constraints = &(*init_data)->constraints; struct regulator_state *suspend_state; struct device_node *suspend_np; + unsigned int mode; int ret, i; u32 pval; @@ -124,11 +125,11 @@ static void of_get_regulation_constraints(struct device_node *np, if (!of_property_read_u32(np, "regulator-initial-mode", &pval)) { if (desc && desc->of_map_mode) { - ret = desc->of_map_mode(pval); - if (ret == -EINVAL) + mode = desc->of_map_mode(pval); + if (mode == REGULATOR_MODE_INVALID) pr_err("%s: invalid mode %u\n", np->name, pval); else - constraints->initial_mode = ret; + constraints->initial_mode = mode; } else { pr_warn("%s: mapping for mode %d not defined\n", np->name, pval); @@ -163,12 +164,12 @@ static void of_get_regulation_constraints(struct device_node *np, if (!of_property_read_u32(suspend_np, "regulator-mode", &pval)) { if (desc && desc->of_map_mode) { - ret = desc->of_map_mode(pval); - if (ret == -EINVAL) + mode = desc->of_map_mode(pval); + if (mode == REGULATOR_MODE_INVALID) pr_err("%s: invalid mode %u\n", np->name, pval); else - suspend_state->mode = ret; + suspend_state->mode = mode; } else { pr_warn("%s: mapping for mode %d not defined\n", np->name, pval); diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c index a4456db5849d..884c7505ed91 100644 --- a/drivers/regulator/twl-regulator.c +++ b/drivers/regulator/twl-regulator.c @@ -274,7 +274,7 @@ static inline unsigned int twl4030reg_map_mode(unsigned int mode) case RES_STATE_SLEEP: return REGULATOR_MODE_STANDBY; default: - return -EINVAL; + return REGULATOR_MODE_INVALID; } } diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index df176d7c2b87..25602afd4844 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -80,6 +80,7 @@ struct regmap; * These modes can be OR'ed together to make up a mask of valid register modes. */ +#define REGULATOR_MODE_INVALID 0x0 #define REGULATOR_MODE_FAST 0x1 #define REGULATOR_MODE_NORMAL 0x2 #define REGULATOR_MODE_IDLE 0x4 -- GitLab From 0b45eb5a340d100856721c8ba9a49e29e655d7f7 Mon Sep 17 00:00:00 2001 From: Xose Vazquez Perez Date: Sat, 7 Apr 2018 00:47:23 +0200 Subject: [PATCH 0561/1291] scsi: scsi_dh: replace too broad "TP9" string with the exact models [ Upstream commit 37b37d2609cb0ac267280ef27350b962d16d272e ] SGI/TP9100 is not an RDAC array: ^^^ https://git.opensvc.com/gitweb.cgi?p=multipath-tools/.git;a=blob;f=libmultipath/hwtable.c;h=88b4700beb1d8940008020fbe4c3cd97d62f4a56;hb=HEAD#l235 This partially reverts commit 35204772ea03 ("[SCSI] scsi_dh_rdac : Consolidate rdac strings together") [mkp: fixed up the new entries to align with rest of struct] Cc: NetApp RDAC team Cc: Hannes Reinecke Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: SCSI ML Cc: DM ML Signed-off-by: Xose Vazquez Perez Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_dh.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index a5e30e9449ef..375cede0c534 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -58,7 +58,10 @@ static const struct scsi_dh_blist scsi_dh_blist[] = { {"IBM", "3526", "rdac", }, {"IBM", "3542", "rdac", }, {"IBM", "3552", "rdac", }, - {"SGI", "TP9", "rdac", }, + {"SGI", "TP9300", "rdac", }, + {"SGI", "TP9400", "rdac", }, + {"SGI", "TP9500", "rdac", }, + {"SGI", "TP9700", "rdac", }, {"SGI", "IS", "rdac", }, {"STK", "OPENstorage", "rdac", }, {"STK", "FLEXLINE 380", "rdac", }, -- GitLab From 07b2a0d0018381c7b9760e6d3ceb72f683ad46ba Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 6 Apr 2018 02:02:11 -0700 Subject: [PATCH 0562/1291] scsi: megaraid_sas: Increase timeout by 1 sec for non-RAID fastpath IOs [ Upstream commit 3239b8cd28fd849a2023483257d35d68c5876c74 ] Hardware could time out Fastpath IOs one second earlier than the timeout provided by the host. For non-RAID devices, driver provides timeout value based on OS provided timeout value. Under certain scenarios, if the OS provides a timeout value of 1 second, due to above behavior hardware will timeout immediately. Increase timeout value for non-RAID fastpath IOs by 1 second. Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index d8f626567f59..06a2e3d9fc5b 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2677,6 +2677,9 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, pRAID_Context->timeout_value = cpu_to_le16(os_timeout_value); pRAID_Context->virtual_disk_tgt_id = cpu_to_le16(device_id); } else { + if (os_timeout_value) + os_timeout_value++; + /* system pd Fast Path */ io_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST; timeout_limit = (scmd->device->type == TYPE_DISK) ? -- GitLab From bb7cccb01c84e0b7c0a4d43fa7b25510e56e92df Mon Sep 17 00:00:00 2001 From: Uma Krishnan Date: Mon, 26 Mar 2018 11:35:27 -0500 Subject: [PATCH 0563/1291] scsi: cxlflash: Synchronize reset and remove ops [ Upstream commit a3feb6ef50def7c91244d7bd15a3625b7b49b81f ] The following Oops can be encountered if a device removal or system shutdown is initiated while an EEH recovery is in process: [c000000ff2f479c0] c008000015256f18 cxlflash_pci_slot_reset+0xa0/0x100 [cxlflash] [c000000ff2f47a30] c00800000dae22e0 cxl_pci_slot_reset+0x168/0x290 [cxl] [c000000ff2f47ae0] c00000000003ef1c eeh_report_reset+0xec/0x170 [c000000ff2f47b20] c00000000003d0b8 eeh_pe_dev_traverse+0x98/0x170 [c000000ff2f47bb0] c00000000003f80c eeh_handle_normal_event+0x56c/0x580 [c000000ff2f47c60] c00000000003fba4 eeh_handle_event+0x2a4/0x338 [c000000ff2f47d10] c0000000000400b8 eeh_event_handler+0x1f8/0x200 [c000000ff2f47dc0] c00000000013da48 kthread+0x1a8/0x1b0 [c000000ff2f47e30] c00000000000b528 ret_from_kernel_thread+0x5c/0xb4 The remove handler frees AFU memory while the EEH recovery is in progress, leading to a race condition. This can result in a crash if the recovery thread tries to access this memory. To resolve this issue, the cxlflash remove handler will evaluate the device state and yield to any active reset or probing threads. Signed-off-by: Uma Krishnan Acked-by: Matthew R. Ochs Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/cxlflash/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index 0b6467206f8e..12faf7f6c8ac 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -946,9 +946,9 @@ static void cxlflash_remove(struct pci_dev *pdev) return; } - /* If a Task Management Function is active, wait for it to complete - * before continuing with remove. - */ + /* Yield to running recovery threads before continuing with remove */ + wait_event(cfg->reset_waitq, cfg->state != STATE_RESET && + cfg->state != STATE_PROBING); spin_lock_irqsave(&cfg->tmf_slock, lock_flags); if (cfg->tmf_active) wait_event_interruptible_lock_irq(cfg->tmf_waitq, -- GitLab From a5ed99bc6f59803e58b59bc1c84444f77b6b7712 Mon Sep 17 00:00:00 2001 From: "Matthew R. Ochs" Date: Mon, 26 Mar 2018 11:30:22 -0500 Subject: [PATCH 0564/1291] scsi: cxlflash: Avoid clobbering context control register value [ Upstream commit 465891fe9237b02f8d0fd26448f733fae7236f4a ] The SISLite specification originally defined the context control register with a single field of bits to represent the LISN and also stipulated that the register reset value be 0. The cxlflash driver took advantage of this when programming the LISN for the master contexts via an unconditional write - no other bits were preserved. When unmap support was added, SISLite was updated to define bit 0 of the context control register as a way for the AFU to notify the context owner that unmap operations were supported. Thus the assumptions under which the register is setup changed and the existing unconditional write is clobbering the unmap state for master contexts. This is presently not an issue due to the order in which the context control register is programmed in relation to the unmap bit being queried but should be addressed to avoid a future regression in the event this code is moved elsewhere. To remedy this issue, preserve the bits when programming the LISN field in the context control register. Since the LISN will now be programmed using a read value, assert that the initial state of the LISN field is as described in SISLite (0). Signed-off-by: Matthew R. Ochs Signed-off-by: Uma Krishnan Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/cxlflash/main.c | 5 ++++- drivers/scsi/cxlflash/sislite.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index 12faf7f6c8ac..737314cac8d8 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -1303,7 +1303,10 @@ static void afu_err_intr_init(struct afu *afu) for (i = 0; i < afu->num_hwqs; i++) { hwq = get_hwq(afu, i); - writeq_be(SISL_MSI_SYNC_ERROR, &hwq->host_map->ctx_ctrl); + reg = readq_be(&hwq->host_map->ctx_ctrl); + WARN_ON((reg & SISL_CTX_CTRL_LISN_MASK) != 0); + reg |= SISL_MSI_SYNC_ERROR; + writeq_be(reg, &hwq->host_map->ctx_ctrl); writeq_be(SISL_ISTATUS_MASK, &hwq->host_map->intr_mask); } } diff --git a/drivers/scsi/cxlflash/sislite.h b/drivers/scsi/cxlflash/sislite.h index 09daa86670fc..0892fb1f0a1e 100644 --- a/drivers/scsi/cxlflash/sislite.h +++ b/drivers/scsi/cxlflash/sislite.h @@ -284,6 +284,7 @@ struct sisl_host_map { __be64 cmd_room; __be64 ctx_ctrl; /* least significant byte or b56:63 is LISN# */ #define SISL_CTX_CTRL_UNMAP_SECTOR 0x8000000000000000ULL /* b0 */ +#define SISL_CTX_CTRL_LISN_MASK (0xFFULL) __be64 mbox_w; /* restricted use */ __be64 sq_start; /* Submission Queue (R/W): write sequence and */ __be64 sq_end; /* inclusion semantics are the same as RRQ */ -- GitLab From 98121d665d93f306d832ac635fd77acdfeb41302 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 12 Apr 2018 05:31:59 -0400 Subject: [PATCH 0565/1291] media: atomisp: compat32: fix __user annotations [ Upstream commit ad4222a0e29664666a71685a6e732923ca7c7e45 ] The __user annotations at the compat32 code is not right: drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:81:18: warning: incorrect type in assignment (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:81:18: expected void *base drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:81:18: got void [noderef] * drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:232:23: warning: incorrect type in assignment (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:232:23: expected unsigned int [usertype] *xcoords_y drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:232:23: got void [noderef] * drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:233:23: warning: incorrect type in assignment (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:233:23: expected unsigned int [usertype] *ycoords_y drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:233:23: got void [noderef] * drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:234:24: warning: incorrect type in assignment (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:234:24: expected unsigned int [usertype] *xcoords_uv drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:234:24: got void [noderef] * drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:235:24: warning: incorrect type in assignment (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:235:24: expected unsigned int [usertype] *ycoords_uv drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:235:24: got void [noderef] * drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:296:29: warning: incorrect type in assignment (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:296:29: expected unsigned int [usertype] *effective_width drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:296:29: got void [noderef] * drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:360:29: warning: incorrect type in assignment (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:360:29: expected unsigned int [usertype] *effective_width drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:360:29: got void [noderef] * drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:437:19: warning: incorrect type in assignment (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:437:19: expected struct v4l2_framebuffer *frame drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:437:19: got void [noderef] * drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:481:29: warning: incorrect type in assignment (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:481:29: expected unsigned short *calb_grp_values drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:481:29: got void [noderef] * drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:701:39: warning: cast removes address space of expression drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:704:21: warning: incorrect type in argument 1 (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:704:21: expected void const volatile [noderef] * drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:704:21: got unsigned int [usertype] *src drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:737:43: warning: incorrect type in assignment (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:737:43: expected struct atomisp_shading_table *shading_table drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:737:43: got void [noderef] * drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:742:44: warning: incorrect type in argument 1 (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:742:44: expected void [noderef] *to drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:742:44: got struct atomisp_shading_table *shading_table drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:755:41: warning: incorrect type in assignment (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:755:41: expected struct atomisp_morph_table *morph_table drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:755:41: got void [noderef] * drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:760:44: warning: incorrect type in argument 1 (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:760:44: expected void [noderef] *to drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:760:44: got struct atomisp_morph_table *morph_table drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:772:40: warning: incorrect type in assignment (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:772:40: expected struct atomisp_dvs2_coefficients *dvs2_coefs drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:772:40: got void [noderef] * drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:777:44: warning: incorrect type in argument 1 (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:777:44: expected void [noderef] *to drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:777:44: got struct atomisp_dvs2_coefficients *dvs2_coefs drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:788:46: warning: incorrect type in assignment (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:788:46: expected struct atomisp_dvs_6axis_config *dvs_6axis_config drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:788:46: got void [noderef] * drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:793:44: warning: incorrect type in argument 1 (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:793:44: expected void [noderef] *to drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:793:44: got struct atomisp_dvs_6axis_config *dvs_6axis_config drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:853:17: warning: incorrect type in assignment (different address spaces) drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:853:17: expected struct atomisp_sensor_ae_bracketing_lut_entry *lut drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c:853:17: got void [noderef] * Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../pci/atomisp2/atomisp_compat_ioctl32.c | 49 ++++++++++--------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c index 0592ac1f2832..cfe6bb610014 100644 --- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c +++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.c @@ -81,7 +81,7 @@ static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, get_user(kp->flags, &up->flags)) return -EFAULT; - kp->base = compat_ptr(tmp); + kp->base = (void __force *)compat_ptr(tmp); get_v4l2_pix_format((struct v4l2_pix_format *)&kp->fmt, &up->fmt); return 0; } @@ -232,10 +232,10 @@ static int get_atomisp_dvs_6axis_config32(struct atomisp_dvs_6axis_config *kp, get_user(ycoords_uv, &up->ycoords_uv)) return -EFAULT; - kp->xcoords_y = compat_ptr(xcoords_y); - kp->ycoords_y = compat_ptr(ycoords_y); - kp->xcoords_uv = compat_ptr(xcoords_uv); - kp->ycoords_uv = compat_ptr(ycoords_uv); + kp->xcoords_y = (void __force *)compat_ptr(xcoords_y); + kp->ycoords_y = (void __force *)compat_ptr(ycoords_y); + kp->xcoords_uv = (void __force *)compat_ptr(xcoords_uv); + kp->ycoords_uv = (void __force *)compat_ptr(ycoords_uv); return 0; } @@ -296,7 +296,7 @@ static int get_atomisp_metadata_stat32(struct atomisp_metadata *kp, return -EFAULT; kp->data = compat_ptr(data); - kp->effective_width = compat_ptr(effective_width); + kp->effective_width = (void __force *)compat_ptr(effective_width); return 0; } @@ -360,7 +360,7 @@ static int get_atomisp_metadata_by_type_stat32( return -EFAULT; kp->data = compat_ptr(data); - kp->effective_width = compat_ptr(effective_width); + kp->effective_width = (void __force *)compat_ptr(effective_width); return 0; } @@ -437,7 +437,7 @@ static int get_atomisp_overlay32(struct atomisp_overlay *kp, get_user(kp->overlay_start_x, &up->overlay_start_y)) return -EFAULT; - kp->frame = compat_ptr(frame); + kp->frame = (void __force *)compat_ptr(frame); return 0; } @@ -481,7 +481,7 @@ static int get_atomisp_calibration_group32( get_user(calb_grp_values, &up->calb_grp_values)) return -EFAULT; - kp->calb_grp_values = compat_ptr(calb_grp_values); + kp->calb_grp_values = (void __force *)compat_ptr(calb_grp_values); return 0; } @@ -703,8 +703,8 @@ static int get_atomisp_parameters32(struct atomisp_parameters *kp, return -EFAULT; while (n >= 0) { - compat_uptr_t *src = (compat_uptr_t *)up + n; - uintptr_t *dst = (uintptr_t *)kp + n; + compat_uptr_t __user *src = ((compat_uptr_t __user *)up) + n; + uintptr_t *dst = ((uintptr_t *)kp) + n; if (get_user((*dst), src)) return -EFAULT; @@ -751,12 +751,12 @@ static int get_atomisp_parameters32(struct atomisp_parameters *kp, #endif return -EFAULT; - kp->shading_table = user_ptr + offset; + kp->shading_table = (void __force *)user_ptr + offset; offset = sizeof(struct atomisp_shading_table); if (!kp->shading_table) return -EFAULT; - if (copy_to_user(kp->shading_table, + if (copy_to_user((void __user *)kp->shading_table, &karg.shading_table, sizeof(struct atomisp_shading_table))) return -EFAULT; @@ -777,13 +777,14 @@ static int get_atomisp_parameters32(struct atomisp_parameters *kp, #endif return -EFAULT; - kp->morph_table = user_ptr + offset; + kp->morph_table = (void __force *)user_ptr + offset; offset += sizeof(struct atomisp_morph_table); if (!kp->morph_table) return -EFAULT; - if (copy_to_user(kp->morph_table, &karg.morph_table, - sizeof(struct atomisp_morph_table))) + if (copy_to_user((void __user *)kp->morph_table, + &karg.morph_table, + sizeof(struct atomisp_morph_table))) return -EFAULT; } @@ -802,13 +803,14 @@ static int get_atomisp_parameters32(struct atomisp_parameters *kp, #endif return -EFAULT; - kp->dvs2_coefs = user_ptr + offset; + kp->dvs2_coefs = (void __force *)user_ptr + offset; offset += sizeof(struct atomisp_dis_coefficients); if (!kp->dvs2_coefs) return -EFAULT; - if (copy_to_user(kp->dvs2_coefs, &karg.dvs2_coefs, - sizeof(struct atomisp_dis_coefficients))) + if (copy_to_user((void __user *)kp->dvs2_coefs, + &karg.dvs2_coefs, + sizeof(struct atomisp_dis_coefficients))) return -EFAULT; } /* handle dvs 6axis configuration */ @@ -826,13 +828,14 @@ static int get_atomisp_parameters32(struct atomisp_parameters *kp, #endif return -EFAULT; - kp->dvs_6axis_config = user_ptr + offset; + kp->dvs_6axis_config = (void __force *)user_ptr + offset; offset += sizeof(struct atomisp_dvs_6axis_config); if (!kp->dvs_6axis_config) return -EFAULT; - if (copy_to_user(kp->dvs_6axis_config, &karg.dvs_6axis_config, - sizeof(struct atomisp_dvs_6axis_config))) + if (copy_to_user((void __user *)kp->dvs_6axis_config, + &karg.dvs_6axis_config, + sizeof(struct atomisp_dvs_6axis_config))) return -EFAULT; } } @@ -891,7 +894,7 @@ static int get_atomisp_sensor_ae_bracketing_lut( get_user(lut, &up->lut)) return -EFAULT; - kp->lut = compat_ptr(lut); + kp->lut = (void __force *)compat_ptr(lut); return 0; } -- GitLab From e1d4f1e2856ee76e0d12141da2737a87875037fa Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Apr 2018 07:54:51 -0400 Subject: [PATCH 0566/1291] media: si470x: fix __be16 annotations [ Upstream commit 90db5c829692a0a7845e977e45719b4699216bd4 ] The annotations there are wrong as warned: drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16 drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16 drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16 drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16 drivers/media/radio/si470x/radio-si470x-i2c.c:129:24: warning: incorrect type in assignment (different base types) drivers/media/radio/si470x/radio-si470x-i2c.c:129:24: expected unsigned short [unsigned] [short] drivers/media/radio/si470x/radio-si470x-i2c.c:129:24: got restricted __be16 [usertype] drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16 drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16 drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16 drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16 Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/radio/si470x/radio-si470x-i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/radio/si470x/radio-si470x-i2c.c b/drivers/media/radio/si470x/radio-si470x-i2c.c index b3034f80163f..8ce6f9cff746 100644 --- a/drivers/media/radio/si470x/radio-si470x-i2c.c +++ b/drivers/media/radio/si470x/radio-si470x-i2c.c @@ -92,7 +92,7 @@ MODULE_PARM_DESC(max_rds_errors, "RDS maximum block errors: *1*"); */ int si470x_get_register(struct si470x_device *radio, int regnr) { - u16 buf[READ_REG_NUM]; + __be16 buf[READ_REG_NUM]; struct i2c_msg msgs[1] = { { .addr = radio->client->addr, @@ -117,7 +117,7 @@ int si470x_get_register(struct si470x_device *radio, int regnr) int si470x_set_register(struct si470x_device *radio, int regnr) { int i; - u16 buf[WRITE_REG_NUM]; + __be16 buf[WRITE_REG_NUM]; struct i2c_msg msgs[1] = { { .addr = radio->client->addr, @@ -147,7 +147,7 @@ int si470x_set_register(struct si470x_device *radio, int regnr) static int si470x_get_all_registers(struct si470x_device *radio) { int i; - u16 buf[READ_REG_NUM]; + __be16 buf[READ_REG_NUM]; struct i2c_msg msgs[1] = { { .addr = radio->client->addr, -- GitLab From b754906cd9c9e7dce29e38a744a61b28dc161e5f Mon Sep 17 00:00:00 2001 From: Kirill Marinushkin Date: Wed, 4 Apr 2018 06:19:37 +0200 Subject: [PATCH 0567/1291] ASoC: topology: Fix bclk and fsync inversion in set_link_hw_format() [ Upstream commit a941e2fab3207cb0d57dc4ec47b1b12c8ea78b84 ] The values of bclk and fsync are inverted WRT the codec. But the existing solution already works for Broadwell, see the alsa-lib config: `alsa-lib/src/conf/topology/broadwell/broadwell.conf` This commit provides the backwards-compatible solution to fix this misuse. Signed-off-by: Kirill Marinushkin Reviewed-by: Pierre-Louis Bossart Tested-by: Pan Xiuli Tested-by: Pierre-Louis Bossart Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: Mark Brown Cc: Liam Girdwood Cc: linux-kernel@vger.kernel.org Cc: alsa-devel@alsa-project.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/uapi/sound/asoc.h | 16 ++++++++++++++-- sound/soc/soc-topology.c | 12 +++++++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h index 69c37ecbff7e..f0e5e21efa54 100644 --- a/include/uapi/sound/asoc.h +++ b/include/uapi/sound/asoc.h @@ -160,6 +160,18 @@ #define SND_SOC_TPLG_LNK_FLGBIT_SYMMETRIC_SAMPLEBITS (1 << 2) #define SND_SOC_TPLG_LNK_FLGBIT_VOICE_WAKEUP (1 << 3) +/* DAI topology BCLK parameter + * For the backwards capability, by default codec is bclk master + */ +#define SND_SOC_TPLG_BCLK_CM 0 /* codec is bclk master */ +#define SND_SOC_TPLG_BCLK_CS 1 /* codec is bclk slave */ + +/* DAI topology FSYNC parameter + * For the backwards capability, by default codec is fsync master + */ +#define SND_SOC_TPLG_FSYNC_CM 0 /* codec is fsync master */ +#define SND_SOC_TPLG_FSYNC_CS 1 /* codec is fsync slave */ + /* * Block Header. * This header precedes all object and object arrays below. @@ -315,8 +327,8 @@ struct snd_soc_tplg_hw_config { __u8 clock_gated; /* 1 if clock can be gated to save power */ __u8 invert_bclk; /* 1 for inverted BCLK, 0 for normal */ __u8 invert_fsync; /* 1 for inverted frame clock, 0 for normal */ - __u8 bclk_master; /* 1 for master of BCLK, 0 for slave */ - __u8 fsync_master; /* 1 for master of FSYNC, 0 for slave */ + __u8 bclk_master; /* SND_SOC_TPLG_BCLK_ value */ + __u8 fsync_master; /* SND_SOC_TPLG_FSYNC_ value */ __u8 mclk_direction; /* 0 for input, 1 for output */ __le16 reserved; /* for 32bit alignment */ __le32 mclk_rate; /* MCLK or SYSCLK freqency in Hz */ diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 30cdad2eab7f..5e74844b0eb7 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -2010,13 +2010,15 @@ static void set_link_hw_format(struct snd_soc_dai_link *link, link->dai_fmt |= SND_SOC_DAIFMT_IB_IF; /* clock masters */ - bclk_master = hw_config->bclk_master; - fsync_master = hw_config->fsync_master; - if (!bclk_master && !fsync_master) + bclk_master = (hw_config->bclk_master == + SND_SOC_TPLG_BCLK_CM); + fsync_master = (hw_config->fsync_master == + SND_SOC_TPLG_FSYNC_CM); + if (bclk_master && fsync_master) link->dai_fmt |= SND_SOC_DAIFMT_CBM_CFM; - else if (bclk_master && !fsync_master) - link->dai_fmt |= SND_SOC_DAIFMT_CBS_CFM; else if (!bclk_master && fsync_master) + link->dai_fmt |= SND_SOC_DAIFMT_CBS_CFM; + else if (bclk_master && !fsync_master) link->dai_fmt |= SND_SOC_DAIFMT_CBM_CFS; else link->dai_fmt |= SND_SOC_DAIFMT_CBS_CFS; -- GitLab From a2fdb85a15eb060dbe64201d6932e6c72379bcef Mon Sep 17 00:00:00 2001 From: Kirill Marinushkin Date: Wed, 4 Apr 2018 06:19:38 +0200 Subject: [PATCH 0568/1291] ASoC: topology: Add missing clock gating parameter when parsing hw_configs [ Upstream commit 933e1c4a667103c4d10ebdc9505a0a6abd8c3fbd ] Clock gating parameter is a part of `dai_fmt`. It is supported by `alsa-lib` when creating a topology binary file, but ignored by kernel when loading this topology file. After applying this commit, the clock gating parameter is not ignored any more. This solution is backwards compatible. The existing behaviour is not broken, because by default the parameter value is 0 and is ignored. snd_soc_tplg_hw_config.clock_gated = 0 => no effect snd_soc_tplg_hw_config.clock_gated = 1 => SND_SOC_DAIFMT_GATED snd_soc_tplg_hw_config.clock_gated = 2 => SND_SOC_DAIFMT_CONT For example, the following config, based on alsa-lib/src/conf/topology/broadwell/broadwell.conf, is now supported: ~~~~ SectionHWConfig."CodecHWConfig" { id "1" format "I2S" # physical audio format. pm_gate_clocks "true" # clock can be gated } SectionLink."Codec" { # used for binding to the physical link id "0" hw_configs [ "CodecHWConfig" ] default_hw_conf_id "1" } ~~~~ Signed-off-by: Kirill Marinushkin Reviewed-by: Pierre-Louis Bossart Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: Mark Brown Cc: Pan Xiuli Cc: Liam Girdwood Cc: linux-kernel@vger.kernel.org Cc: alsa-devel@alsa-project.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/uapi/sound/asoc.h | 7 ++++++- sound/soc/soc-topology.c | 7 +++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h index f0e5e21efa54..f3c4b46e39d8 100644 --- a/include/uapi/sound/asoc.h +++ b/include/uapi/sound/asoc.h @@ -139,6 +139,11 @@ #define SND_SOC_TPLG_DAI_FLGBIT_SYMMETRIC_CHANNELS (1 << 1) #define SND_SOC_TPLG_DAI_FLGBIT_SYMMETRIC_SAMPLEBITS (1 << 2) +/* DAI clock gating */ +#define SND_SOC_TPLG_DAI_CLK_GATE_UNDEFINED 0 +#define SND_SOC_TPLG_DAI_CLK_GATE_GATED 1 +#define SND_SOC_TPLG_DAI_CLK_GATE_CONT 2 + /* DAI physical PCM data formats. * Add new formats to the end of the list. */ @@ -324,7 +329,7 @@ struct snd_soc_tplg_hw_config { __le32 size; /* in bytes of this structure */ __le32 id; /* unique ID - - used to match */ __le32 fmt; /* SND_SOC_DAI_FORMAT_ format value */ - __u8 clock_gated; /* 1 if clock can be gated to save power */ + __u8 clock_gated; /* SND_SOC_TPLG_DAI_CLK_GATE_ value */ __u8 invert_bclk; /* 1 for inverted BCLK, 0 for normal */ __u8 invert_fsync; /* 1 for inverted frame clock, 0 for normal */ __u8 bclk_master; /* SND_SOC_TPLG_BCLK_ value */ diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 5e74844b0eb7..c1619860a5de 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1997,6 +1997,13 @@ static void set_link_hw_format(struct snd_soc_dai_link *link, link->dai_fmt = hw_config->fmt & SND_SOC_DAIFMT_FORMAT_MASK; + /* clock gating */ + if (hw_config->clock_gated == SND_SOC_TPLG_DAI_CLK_GATE_GATED) + link->dai_fmt |= SND_SOC_DAIFMT_GATED; + else if (hw_config->clock_gated == + SND_SOC_TPLG_DAI_CLK_GATE_CONT) + link->dai_fmt |= SND_SOC_DAIFMT_CONT; + /* clock signal polarity */ invert_bclk = hw_config->invert_bclk; invert_fsync = hw_config->invert_fsync; -- GitLab From 20f01a1b7b6d00b8b440c57b124a10606e209271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 28 Mar 2018 15:30:37 -0700 Subject: [PATCH 0569/1291] drm: Add DP PSR2 sink enable bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4f212e40468650e220c1770876c7f25b8e0c1ff5 ] To comply with eDP1.4a this bit should be set when enabling PSR2. Signed-off-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180328223046.16125-1-jose.souza@intel.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/drm/drm_dp_helper.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index b17476a6909c..8fd7cb5297ab 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -453,6 +453,7 @@ # define DP_PSR_FRAME_CAPTURE (1 << 3) # define DP_PSR_SELECTIVE_UPDATE (1 << 4) # define DP_PSR_IRQ_HPD_WITH_CRC_ERRORS (1 << 5) +# define DP_PSR_ENABLE_PSR2 (1 << 6) /* eDP 1.4a */ #define DP_ADAPTER_CTRL 0x1a0 # define DP_ADAPTER_CTRL_FORCE_LOAD_SENSE (1 << 0) -- GitLab From 2aa0e652bce105ca0d5544e887ed7224e0339a91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 22 Mar 2018 17:22:51 +0200 Subject: [PATCH 0570/1291] drm/atomic-helper: Drop plane->fb references only for drm_atomic_helper_shutdown() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5e9cfeba6abb7e1a3f240bd24eb29178f0b83716 ] drm_atomic_helper_shutdown() needs to release the reference held by plane->fb. Since commit 49d70aeaeca8 ("drm/atomic-helper: Fix leak in disable_all") we're doing that by calling drm_atomic_clean_old_fb() in drm_atomic_helper_disable_all(). This also leaves plane->fb == NULL afterwards. However, since drm_atomic_helper_disable_all() is also used by the i915 gpu reset code drm_atomic_helper_commit_duplicated_state() then has to undo the damage and put the correct plane->fb pointers back in (and also adjust the ref counts to match again as well). That approach doesn't work so well for load detection as nothing sets up the plane->old_fb pointers for us. This causes us to leak an extra reference for each plane->fb when drm_atomic_helper_commit_duplicated_state() calls drm_atomic_clean_old_fb() after load detection. To fix this let's call drm_atomic_clean_old_fb() only for drm_atomic_helper_shutdown() as that's the only time we need to actually drop the plane->fb references. In all the other cases (load detection, gpu reset) we want to leave plane->fb alone. v2: Don't inflict the clean_old_fbs bool to drivers (Daniel) v3: Squash in the revert and rewrite the commit msg (Daniel) Cc: martin.peres@free.fr Cc: chris@chris-wilson.co.uk Cc: Dave Airlie Cc: Maarten Lankhorst Cc: Daniel Vetter Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180322152313.6561-3-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst #pre-squash Reviewed-by: Daniel Vetter Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_atomic_helper.c | 78 ++++++++++++++--------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 0028591f3f95..1f08d597b87a 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -2683,31 +2683,9 @@ int __drm_atomic_helper_set_config(struct drm_mode_set *set, return 0; } -/** - * drm_atomic_helper_disable_all - disable all currently active outputs - * @dev: DRM device - * @ctx: lock acquisition context - * - * Loops through all connectors, finding those that aren't turned off and then - * turns them off by setting their DPMS mode to OFF and deactivating the CRTC - * that they are connected to. - * - * This is used for example in suspend/resume to disable all currently active - * functions when suspending. If you just want to shut down everything at e.g. - * driver unload, look at drm_atomic_helper_shutdown(). - * - * Note that if callers haven't already acquired all modeset locks this might - * return -EDEADLK, which must be handled by calling drm_modeset_backoff(). - * - * Returns: - * 0 on success or a negative error code on failure. - * - * See also: - * drm_atomic_helper_suspend(), drm_atomic_helper_resume() and - * drm_atomic_helper_shutdown(). - */ -int drm_atomic_helper_disable_all(struct drm_device *dev, - struct drm_modeset_acquire_ctx *ctx) +static int __drm_atomic_helper_disable_all(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx, + bool clean_old_fbs) { struct drm_atomic_state *state; struct drm_connector_state *conn_state; @@ -2759,8 +2737,11 @@ int drm_atomic_helper_disable_all(struct drm_device *dev, goto free; drm_atomic_set_fb_for_plane(plane_state, NULL); - plane_mask |= BIT(drm_plane_index(plane)); - plane->old_fb = plane->fb; + + if (clean_old_fbs) { + plane->old_fb = plane->fb; + plane_mask |= BIT(drm_plane_index(plane)); + } } ret = drm_atomic_commit(state); @@ -2771,6 +2752,34 @@ int drm_atomic_helper_disable_all(struct drm_device *dev, return ret; } +/** + * drm_atomic_helper_disable_all - disable all currently active outputs + * @dev: DRM device + * @ctx: lock acquisition context + * + * Loops through all connectors, finding those that aren't turned off and then + * turns them off by setting their DPMS mode to OFF and deactivating the CRTC + * that they are connected to. + * + * This is used for example in suspend/resume to disable all currently active + * functions when suspending. If you just want to shut down everything at e.g. + * driver unload, look at drm_atomic_helper_shutdown(). + * + * Note that if callers haven't already acquired all modeset locks this might + * return -EDEADLK, which must be handled by calling drm_modeset_backoff(). + * + * Returns: + * 0 on success or a negative error code on failure. + * + * See also: + * drm_atomic_helper_suspend(), drm_atomic_helper_resume() and + * drm_atomic_helper_shutdown(). + */ +int drm_atomic_helper_disable_all(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx) +{ + return __drm_atomic_helper_disable_all(dev, ctx, false); +} EXPORT_SYMBOL(drm_atomic_helper_disable_all); /** @@ -2793,7 +2802,7 @@ void drm_atomic_helper_shutdown(struct drm_device *dev) while (1) { ret = drm_modeset_lock_all_ctx(dev, &ctx); if (!ret) - ret = drm_atomic_helper_disable_all(dev, &ctx); + ret = __drm_atomic_helper_disable_all(dev, &ctx, true); if (ret != -EDEADLK) break; @@ -2897,16 +2906,11 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state, struct drm_connector_state *new_conn_state; struct drm_crtc *crtc; struct drm_crtc_state *new_crtc_state; - unsigned plane_mask = 0; - struct drm_device *dev = state->dev; - int ret; state->acquire_ctx = ctx; - for_each_new_plane_in_state(state, plane, new_plane_state, i) { - plane_mask |= BIT(drm_plane_index(plane)); + for_each_new_plane_in_state(state, plane, new_plane_state, i) state->planes[i].old_state = plane->state; - } for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) state->crtcs[i].old_state = crtc->state; @@ -2914,11 +2918,7 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state, for_each_new_connector_in_state(state, connector, new_conn_state, i) state->connectors[i].old_state = connector->state; - ret = drm_atomic_commit(state); - if (plane_mask) - drm_atomic_clean_old_fb(dev, plane_mask, ret); - - return ret; + return drm_atomic_commit(state); } EXPORT_SYMBOL(drm_atomic_helper_commit_duplicated_state); -- GitLab From 2488689fe4684756269a961117bf348ef458d58c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 19 Mar 2018 16:19:32 +0200 Subject: [PATCH 0571/1291] drm/dp/mst: Fix off-by-one typo when dump payload table [ Upstream commit 7056a2bccc3b5afc51f9b35b30a46f0d9219968d ] It seems there is a classical off-by-one typo from the beginning when commit ad7f8a1f9ced ("drm/helper: add Displayport multi-stream helper (v0.6)") introduced a new helper. Fix a typo by introducing a macro constant. Cc: Dave Airlie Signed-off-by: Andy Shevchenko Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180319141932.37290-1-andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 41b492f99955..c022ab6e84bd 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2862,12 +2862,14 @@ static void drm_dp_mst_dump_mstb(struct seq_file *m, } } +#define DP_PAYLOAD_TABLE_SIZE 64 + static bool dump_dp_payload_table(struct drm_dp_mst_topology_mgr *mgr, char *buf) { int i; - for (i = 0; i < 64; i += 16) { + for (i = 0; i < DP_PAYLOAD_TABLE_SIZE; i += 16) { if (drm_dp_dpcd_read(mgr->aux, DP_PAYLOAD_TABLE_UPDATE_STATUS + i, &buf[i], 16) != 16) @@ -2936,7 +2938,7 @@ void drm_dp_mst_dump_topology(struct seq_file *m, mutex_lock(&mgr->lock); if (mgr->mst_primary) { - u8 buf[64]; + u8 buf[DP_PAYLOAD_TABLE_SIZE]; int ret; ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, buf, DP_RECEIVER_CAP_SIZE); @@ -2954,8 +2956,7 @@ void drm_dp_mst_dump_topology(struct seq_file *m, seq_printf(m, " revision: hw: %x.%x sw: %x.%x\n", buf[0x9] >> 4, buf[0x9] & 0xf, buf[0xa], buf[0xb]); if (dump_dp_payload_table(mgr, buf)) - seq_printf(m, "payload table: %*ph\n", 63, buf); - + seq_printf(m, "payload table: %*ph\n", DP_PAYLOAD_TABLE_SIZE, buf); } mutex_unlock(&mgr->lock); -- GitLab From 2258351cf0865b6367a5b2b3843ec39a146bb0f8 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Wed, 25 Jul 2018 23:15:07 +0200 Subject: [PATCH 0572/1291] block: bio_iov_iter_get_pages: fix size of last iovec commit b403ea2404889e1227812fa9657667a1deb9c694 upstream. If the last page of the bio is not "full", the length of the last vector slot needs to be corrected. This slot has the index (bio->bi_vcnt - 1), but only in bio->bi_io_vec. In the "bv" helper array, which is shifted by the value of bio->bi_vcnt at function invocation, the correct index is (nr_pages - 1). v2: improved readability following suggestions from Ming Lei. v3: followed a formatting suggestion from Christoph Hellwig. Fixes: 2cefe4dbaadf ("block: add bio_iov_iter_get_pages()") Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Martin Wilck Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/bio.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/block/bio.c b/block/bio.c index 90f19d7df66c..e5df69918d1a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -881,16 +881,16 @@ EXPORT_SYMBOL(bio_add_page); */ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) { - unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; - size_t offset, diff; + size_t offset; ssize_t size; size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); if (unlikely(size <= 0)) return size ? size : -EFAULT; - nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; + idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; /* * Deep magic below: We need to walk the pinned pages backwards @@ -903,17 +903,15 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) bio->bi_iter.bi_size += size; bio->bi_vcnt += nr_pages; - diff = (nr_pages * PAGE_SIZE - offset) - size; - while (nr_pages--) { - bv[nr_pages].bv_page = pages[nr_pages]; - bv[nr_pages].bv_len = PAGE_SIZE; - bv[nr_pages].bv_offset = 0; + while (idx--) { + bv[idx].bv_page = pages[idx]; + bv[idx].bv_len = PAGE_SIZE; + bv[idx].bv_offset = 0; } bv[0].bv_offset += offset; bv[0].bv_len -= offset; - if (diff) - bv[bio->bi_vcnt - 1].bv_len -= diff; + bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size; iov_iter_advance(iter, size); return 0; -- GitLab From cc5d7097ba8f46a9b5382edba733e5a8705145ba Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Wed, 25 Jul 2018 23:15:08 +0200 Subject: [PATCH 0573/1291] blkdev: __blkdev_direct_IO_simple: fix leak in error case commit 9362dd1109f87a9d0a798fbc890cb339c171ed35 upstream. Fixes: 72ecad22d9f1 ("block: support a full bio worth of IO for simplified bdev direct-io") Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Martin Wilck Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/block_dev.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 789f55e851ae..3323eec5c164 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -231,7 +231,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, ret = bio_iov_iter_get_pages(&bio, iter); if (unlikely(ret)) - return ret; + goto out; ret = bio.bi_iter.bi_size; if (iov_iter_rw(iter) == READ) { @@ -260,12 +260,13 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, put_page(bvec->bv_page); } - if (vecs != inline_vecs) - kfree(vecs); - if (unlikely(bio.bi_status)) ret = blk_status_to_errno(bio.bi_status); +out: + if (vecs != inline_vecs) + kfree(vecs); + bio_uninit(&bio); return ret; -- GitLab From b8088c524ae21dc97c86e09f0e41bb419804b5ad Mon Sep 17 00:00:00 2001 From: Greg Edwards Date: Thu, 26 Jul 2018 14:39:37 -0400 Subject: [PATCH 0574/1291] block: reset bi_iter.bi_done after splitting bio commit 5151842b9d8732d4cbfa8400b40bff894f501b2f upstream. After the bio has been updated to represent the remaining sectors, reset bi_done so bio_rewind_iter() does not rewind further than it should. This resolves a bio_integrity_process() failure on reads where the original request was split. Fixes: 63573e359d05 ("bio-integrity: Restore original iterator on verify stage") Signed-off-by: Greg Edwards Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/bio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/bio.c b/block/bio.c index e5df69918d1a..194d28cdc642 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1889,6 +1889,7 @@ struct bio *bio_split(struct bio *bio, int sectors, bio_integrity_trim(split); bio_advance(bio, split->bi_iter.bi_size); + bio->bi_iter.bi_done = 0; if (bio_flagged(bio, BIO_TRACE_COMPLETION)) bio_set_flag(split, BIO_TRACE_COMPLETION); -- GitLab From af41fd042fc10e65e93ed009c4933fd65c67ef2c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 14 Jul 2018 23:55:57 -0400 Subject: [PATCH 0575/1291] random: mix rdrand with entropy sent in from userspace commit 81e69df38e2911b642ec121dec319fad2a4782f3 upstream. Fedora has integrated the jitter entropy daemon to work around slow boot problems, especially on VM's that don't support virtio-rng: https://bugzilla.redhat.com/show_bug.cgi?id=1572944 It's understandable why they did this, but the Jitter entropy daemon works fundamentally on the principle: "the CPU microarchitecture is **so** complicated and we can't figure it out, so it *must* be random". Yes, it uses statistical tests to "prove" it is secure, but AES_ENCRYPT(NSA_KEY, COUNTER++) will also pass statistical tests with flying colors. So if RDRAND is available, mix it into entropy submitted from userspace. It can't hurt, and if you believe the NSA has backdoored RDRAND, then they probably have enough details about the Intel microarchitecture that they can reverse engineer how the Jitter entropy daemon affects the microarchitecture, and attack its output stream. And if RDRAND is in fact an honest DRNG, it will immeasurably improve on what the Jitter entropy daemon might produce. This also provides some protection against someone who is able to read or set the entropy seed file. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Cc: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index ddc493d976fd..ea4dbfa30657 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1897,14 +1897,22 @@ static int write_pool(struct entropy_store *r, const char __user *buffer, size_t count) { size_t bytes; - __u32 buf[16]; + __u32 t, buf[16]; const char __user *p = buffer; while (count > 0) { + int b, i = 0; + bytes = min(count, sizeof(buf)); if (copy_from_user(&buf, p, bytes)) return -EFAULT; + for (b = bytes ; b > 0 ; b -= sizeof(__u32), i++) { + if (!arch_get_random_int(&t)) + break; + buf[i] ^= t; + } + count -= bytes; p += bytes; -- GitLab From 961f9feb4332f448e5d2fa4851ba86598fc112de Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 Jul 2018 12:44:46 -0700 Subject: [PATCH 0576/1291] squashfs: be more careful about metadata corruption commit 01cfb7937a9af2abb1136c7e89fbf3fd92952956 upstream. Anatoly Trosinenko reports that a corrupted squashfs image can cause a kernel oops. It turns out that squashfs can end up being confused about negative fragment lengths. The regular squashfs_read_data() does check for negative lengths, but squashfs_read_metadata() did not, and the fragment size code just blindly trusted the on-disk value. Fix both the fragment parsing and the metadata reading code. Reported-by: Anatoly Trosinenko Cc: Al Viro Cc: Phillip Lougher Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/squashfs/cache.c | 3 +++ fs/squashfs/file.c | 8 ++++++-- fs/squashfs/fragment.c | 4 +--- fs/squashfs/squashfs_fs.h | 6 ++++++ 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 23813c078cc9..0839efa720b3 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -350,6 +350,9 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer, TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset); + if (unlikely(length < 0)) + return -EIO; + while (length) { entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0); if (entry->error) { diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 13d80947bf9e..fcff2e0487fe 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -194,7 +194,11 @@ static long long read_indexes(struct super_block *sb, int n, } for (i = 0; i < blocks; i++) { - int size = le32_to_cpu(blist[i]); + int size = squashfs_block_size(blist[i]); + if (size < 0) { + err = size; + goto failure; + } block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size); } n -= blocks; @@ -367,7 +371,7 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) sizeof(size)); if (res < 0) return res; - return le32_to_cpu(size); + return squashfs_block_size(size); } /* Copy data into page cache */ diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 0ed6edbc5c71..86ad9a4b8c36 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c @@ -61,9 +61,7 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment, return size; *fragment_block = le64_to_cpu(fragment_entry.start_block); - size = le32_to_cpu(fragment_entry.size); - - return size; + return squashfs_block_size(fragment_entry.size); } diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 24d12fd14177..4e6853f084d0 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -129,6 +129,12 @@ #define SQUASHFS_COMPRESSED_BLOCK(B) (!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK)) +static inline int squashfs_block_size(__le32 raw) +{ + u32 size = le32_to_cpu(raw); + return (size >> 25) ? -EIO : size; +} + /* * Inode number ops. Inodes consist of a compressed block number, and an * uncompressed offset within that block -- GitLab From cdcbe750acd6cd1e083189c2a50810eeb59f057a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 10 Jul 2018 01:07:43 -0400 Subject: [PATCH 0577/1291] ext4: fix inline data updates with checksums enabled commit 362eca70b53389bddf3143fe20f53dcce2cfdf61 upstream. The inline data code was updating the raw inode directly; this is problematic since if metadata checksums are enabled, ext4_mark_inode_dirty() must be called to update the inode's checksum. In addition, the jbd2 layer requires that get_write_access() be called before the metadata buffer is modified. Fix both of these problems. https://bugzilla.kernel.org/show_bug.cgi?id=200443 Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 19 +++++++++++-------- fs/ext4/inode.c | 16 +++++++--------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 7d498f4a3f90..b549cfd2d7d3 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -688,6 +688,10 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, goto convert; } + ret = ext4_journal_get_write_access(handle, iloc.bh); + if (ret) + goto out; + flags |= AOP_FLAG_NOFS; page = grab_cache_page_write_begin(mapping, 0, flags); @@ -716,7 +720,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, out_up_read: up_read(&EXT4_I(inode)->xattr_sem); out: - if (handle) + if (handle && (ret != 1)) ext4_journal_stop(handle); brelse(iloc.bh); return ret; @@ -758,6 +762,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, ext4_write_unlock_xattr(inode, &no_expand); brelse(iloc.bh); + mark_inode_dirty(inode); out: return copied; } @@ -904,7 +909,6 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping, goto out; } - page = grab_cache_page_write_begin(mapping, 0, flags); if (!page) { ret = -ENOMEM; @@ -922,6 +926,9 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping, if (ret < 0) goto out_release_page; } + ret = ext4_journal_get_write_access(handle, iloc.bh); + if (ret) + goto out_release_page; up_read(&EXT4_I(inode)->xattr_sem); *pagep = page; @@ -942,7 +949,6 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page) { - int i_size_changed = 0; int ret; ret = ext4_write_inline_data_end(inode, pos, len, copied, page); @@ -960,10 +966,8 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, * But it's important to update i_size while still holding page lock: * page writeout could otherwise come in and zero beyond i_size. */ - if (pos+copied > inode->i_size) { + if (pos+copied > inode->i_size) i_size_write(inode, pos+copied); - i_size_changed = 1; - } unlock_page(page); put_page(page); @@ -973,8 +977,7 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, * ordering of page lock and transaction start for journaling * filesystems. */ - if (i_size_changed) - mark_inode_dirty(inode); + mark_inode_dirty(inode); return copied; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c2efe4d2ad87..f9baa59de0e2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1388,9 +1388,10 @@ static int ext4_write_end(struct file *file, loff_t old_size = inode->i_size; int ret = 0, ret2; int i_size_changed = 0; + int inline_data = ext4_has_inline_data(inode); trace_ext4_write_end(inode, pos, len, copied); - if (ext4_has_inline_data(inode)) { + if (inline_data) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); if (ret < 0) { @@ -1418,7 +1419,7 @@ static int ext4_write_end(struct file *file, * ordering of page lock and transaction start for journaling * filesystems. */ - if (i_size_changed) + if (i_size_changed || inline_data) ext4_mark_inode_dirty(handle, inode); if (pos + len > inode->i_size && ext4_can_truncate(inode)) @@ -1492,6 +1493,7 @@ static int ext4_journalled_write_end(struct file *file, int partial = 0; unsigned from, to; int size_changed = 0; + int inline_data = ext4_has_inline_data(inode); trace_ext4_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_SIZE - 1); @@ -1499,7 +1501,7 @@ static int ext4_journalled_write_end(struct file *file, BUG_ON(!ext4_handle_valid(handle)); - if (ext4_has_inline_data(inode)) { + if (inline_data) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); if (ret < 0) { @@ -1530,7 +1532,7 @@ static int ext4_journalled_write_end(struct file *file, if (old_size < pos) pagecache_isize_extended(inode, old_size, pos); - if (size_changed) { + if (size_changed || inline_data) { ret2 = ext4_mark_inode_dirty(handle, inode); if (!ret) ret = ret2; @@ -2027,11 +2029,7 @@ static int __ext4_journalled_writepage(struct page *page, } if (inline_data) { - BUFFER_TRACE(inode_bh, "get write access"); - ret = ext4_journal_get_write_access(handle, inode_bh); - - err = ext4_handle_dirty_metadata(handle, inode, inode_bh); - + ret = ext4_mark_inode_dirty(handle, inode); } else { ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, do_journal_get_write_access); -- GitLab From dc1b4b710fbef5f8f5a41efe35368e9dd07e71cb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 12 Jul 2018 19:08:05 -0400 Subject: [PATCH 0578/1291] ext4: check for allocation block validity with block group locked commit 8d5a803c6a6ce4ec258e31f76059ea5153ba46ef upstream. With commit 044e6e3d74a3: "ext4: don't update checksum of new initialized bitmaps" the buffer valid bit will get set without actually setting up the checksum for the allocation bitmap, since the checksum will get calculated once we actually allocate an inode or block. If we are doing this, then we need to (re-)check the verified bit after we take the block group lock. Otherwise, we could race with another process reading and verifying the bitmap, which would then complain about the checksum being invalid. https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1780137 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/balloc.c | 3 +++ fs/ext4/ialloc.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 9c9eafd6bd76..70266a3355dc 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -379,6 +379,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb, return -EFSCORRUPTED; ext4_lock_group(sb, block_group); + if (buffer_verified(bh)) + goto verified; if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, desc, bh))) { ext4_unlock_group(sb, block_group); @@ -401,6 +403,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb, return -EFSCORRUPTED; } set_buffer_verified(bh); +verified: ext4_unlock_group(sb, block_group); return 0; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 95341bc2b3b7..e38340b218a0 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -91,6 +91,8 @@ static int ext4_validate_inode_bitmap(struct super_block *sb, return -EFSCORRUPTED; ext4_lock_group(sb, block_group); + if (buffer_verified(bh)) + goto verified; blk = ext4_inode_bitmap(sb, desc); if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, EXT4_INODES_PER_GROUP(sb) / 8)) { @@ -108,6 +110,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb, return -EFSBADCRC; } set_buffer_verified(bh); +verified: ext4_unlock_group(sb, block_group); return 0; } -- GitLab From f547aa20b4f61662ad3e1a2040bb3cc5778f19b0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 28 Jul 2018 08:12:04 -0400 Subject: [PATCH 0579/1291] ext4: fix check to prevent initializing reserved inodes commit 5012284700775a4e6e3fbe7eac4c543c4874b559 upstream. Commit 8844618d8aa7: "ext4: only look at the bg_flags field if it is valid" will complain if block group zero does not have the EXT4_BG_INODE_ZEROED flag set. Unfortunately, this is not correct, since a freshly created file system has this flag cleared. It gets almost immediately after the file system is mounted read-write --- but the following somewhat unlikely sequence will end up triggering a false positive report of a corrupted file system: mkfs.ext4 /dev/vdc mount -o ro /dev/vdc /vdc mount -o remount,rw /dev/vdc Instead, when initializing the inode table for block group zero, test to make sure that itable_unused count is not too large, since that is the case that will result in some or all of the reserved inodes getting cleared. This fixes the failures reported by Eric Whiteney when running generic/230 and generic/231 in the the nojournal test case. Fixes: 8844618d8aa7 ("ext4: only look at the bg_flags field if it is valid") Reported-by: Eric Whitney Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ialloc.c | 5 ++++- fs/ext4/super.c | 8 +------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index e38340b218a0..2f46564d3fca 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1397,7 +1397,10 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, ext4_itable_unused_count(sb, gdp)), sbi->s_inodes_per_block); - if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { + if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group) || + ((group == 0) && ((EXT4_INODES_PER_GROUP(sb) - + ext4_itable_unused_count(sb, gdp)) < + EXT4_FIRST_INO(sb)))) { ext4_error(sb, "Something is wrong with group %u: " "used itable blocks: %d; " "itable unused count: %u", diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fc32a67a7a19..6b0c1ea95196 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3103,14 +3103,8 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) if (!gdp) continue; - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) - continue; - if (group != 0) + if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) break; - ext4_error(sb, "Inode table for bg 0 marked as " - "needing zeroing"); - if (sb_rdonly(sb)) - return ngroups; } return group; -- GitLab From 011626d61a43224dcebefca44279f981d504e8d2 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 17 Jan 2018 16:48:39 +0100 Subject: [PATCH 0580/1291] PCI: pciehp: Assume NoCompl+ for Thunderbolt ports commit 493fb50e958c1c6deef7feff0b8c3855def78d75 upstream. Certain Thunderbolt 1 controllers claim to support Command Completed events (value of 0b in the No Command Completed Support field of the Slot Capabilities register) but in reality they neither set the Command Completed bit in the Slot Status register nor signal a Command Completed interrupt: 8086:1513 CV82524 [Light Ridge 4C 2010] 8086:151a DSL2310 [Eagle Ridge 2C 2011] 8086:151b CVL2510 [Light Peak 2C 2010] 8086:1547 DSL3510 [Cactus Ridge 4C 2012] 8086:1548 DSL3310 [Cactus Ridge 2C 2012] 8086:1549 DSL2210 [Port Ridge 1C 2011] All known newer chips (Redwood Ridge and onwards) set No Command Completed Support, indicating that they do not support Command Completed events. The user-visible impact is that after unplugging such a device, 2 seconds elapse until pciehp is unbound. That's because on ->remove, pcie_write_cmd() is called via pcie_disable_notification() and every call to pcie_write_cmd() takes 2 seconds (1 second for each invocation of pcie_wait_cmd()): [ 337.942727] pciehp 0000:0a:00.0:pcie204: Timeout on hotplug command 0x1038 (issued 21176 msec ago) [ 340.014735] pciehp 0000:0a:00.0:pcie204: Timeout on hotplug command 0x0000 (issued 2072 msec ago) That by itself has always been unpleasant, but the situation has become worse with commit cc27b735ad3a ("PCI/portdrv: Turn off PCIe services during shutdown"): Now pciehp is unbound on ->shutdown. Because Thunderbolt controllers typically have 4 hotplug ports, every reboot and shutdown is now delayed by 8 seconds, plus another 2 seconds for every attached Thunderbolt 1 device. Thunderbolt hotplug slots are not physical slots that one inserts cards into, but rather logical hotplug slots implemented in silicon. Devices appear beyond those logical slots once a PCI tunnel is established on top of the Thunderbolt Converged I/O switch. One would expect commands written to the Slot Control register to be executed immediately by the silicon, so for simplicity we always assume NoCompl+ for Thunderbolt ports. Fixes: cc27b735ad3a ("PCI/portdrv: Turn off PCIe services during shutdown") Tested-by: Mika Westerberg Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Mika Westerberg Cc: stable@vger.kernel.org # v4.12+ Cc: Sinan Kaya Cc: Yehezkel Bernat Cc: Michael Jamet Cc: Andreas Noever Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pciehp_hpc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 05832b597e53..46c2ee2caf28 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -863,6 +863,13 @@ struct controller *pcie_init(struct pcie_device *dev) if (pdev->hotplug_user_indicators) slot_cap &= ~(PCI_EXP_SLTCAP_AIP | PCI_EXP_SLTCAP_PIP); + /* + * We assume no Thunderbolt controllers support Command Complete events, + * but some controllers falsely claim they do. + */ + if (pdev->is_thunderbolt) + slot_cap |= PCI_EXP_SLTCAP_NCCS; + ctrl->slot_cap = slot_cap; mutex_init(&ctrl->ctrl_lock); init_waitqueue_head(&ctrl->queue); -- GitLab From 21b5b5e80bcf3f873bf89e9d957a129ea1fdcf36 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 21 Nov 2017 15:49:31 +0000 Subject: [PATCH 0581/1291] PCI: xgene: Remove leftover pci_scan_child_bus() call commit 94b9d290b753cbbc87971ee134511245f5872a83 upstream. The changes in commit 9af275be15f7 ("PCI: xgene: Convert PCI scan API to pci_scan_root_bus_bridge()") converted the xgene PCI host driver to the new pci_scan_root_bus_bridge() bus scanning API but erroneously left the existing pci_scan_child_bus() call in place which resulted in duplicate PCI bus enumerations. Remove the leftover pci_scan_child_bus() call to properly complete the API conversion. Fixes: 9af275be15f7 ("PCI: xgene: Convert PCI scan API to pci_scan_root_bus_bridge()") Tested-by: Khuong Dinh Signed-off-by: Lorenzo Pieralisi Cc: # 4.13+ Cc: Bjorn Helgaas Cc: Tanmay Inamdar Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-xgene.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c index 087645116ecb..c78fd9c2cf8c 100644 --- a/drivers/pci/host/pci-xgene.c +++ b/drivers/pci/host/pci-xgene.c @@ -686,7 +686,6 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev) bus = bridge->bus; - pci_scan_child_bus(bus); pci_assign_unassigned_bus_resources(bus); list_for_each_entry(child, &bus->children, node) pcie_bus_configure_settings(child); -- GitLab From 6aaaca7b81e4929a43ac8c0a2a734b762467c60f Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 29 Nov 2017 10:01:32 +0800 Subject: [PATCH 0582/1291] ovl: Sync upper dirty data when syncing overlayfs commit e8d4bfe3a71537284a90561f77c85dea6c154369 upstream. When executing filesystem sync or umount on overlayfs, dirty data does not get synced as expected on upper filesystem. This patch fixes sync filesystem method to keep data consistency for overlayfs. Signed-off-by: Chengguang Xu Fixes: e593b2bf513d ("ovl: properly implement sync_filesystem()") Cc: #4.11 Signed-off-by: Miklos Szeredi Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/super.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b8f8d666e8d4..ba20393d60ef 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -232,6 +232,7 @@ static void ovl_put_super(struct super_block *sb) kfree(ufs); } +/* Sync real dirty inodes in upper filesystem (if it exists) */ static int ovl_sync_fs(struct super_block *sb, int wait) { struct ovl_fs *ufs = sb->s_fs_info; @@ -240,14 +241,24 @@ static int ovl_sync_fs(struct super_block *sb, int wait) if (!ufs->upper_mnt) return 0; - upper_sb = ufs->upper_mnt->mnt_sb; - if (!upper_sb->s_op->sync_fs) + + /* + * If this is a sync(2) call or an emergency sync, all the super blocks + * will be iterated, including upper_sb, so no need to do anything. + * + * If this is a syncfs(2) call, then we do need to call + * sync_filesystem() on upper_sb, but enough if we do it when being + * called with wait == 1. + */ + if (!wait) return 0; - /* real inodes have already been synced by sync_filesystem(ovl_sb) */ + upper_sb = ufs->upper_mnt->mnt_sb; + down_read(&upper_sb->s_umount); - ret = upper_sb->s_op->sync_fs(upper_sb, wait); + ret = sync_filesystem(upper_sb); up_read(&upper_sb->s_umount); + return ret; } -- GitLab From b8e9dd160ca358f1f2aadccfb64eb1a85d6a77da Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 10 Apr 2018 14:38:50 +0900 Subject: [PATCH 0583/1291] usb: gadget: udc: renesas_usb3: should remove debugfs commit 1990cf7c21ea185cec98c6d45a82c04481261e35 upstream. This patch fixes an issue that this driver doesn't remove its debugfs. Fixes: 43ba968b00ea ("usb: gadget: udc: renesas_usb3: add debugfs to set the b-device mode") Cc: # v4.14+ Signed-off-by: Yoshihiro Shimoda Reviewed-by: Simon Horman Signed-off-by: Felipe Balbi Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/renesas_usb3.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 4cfa72cb0a91..c12a1a6554ba 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -334,6 +334,7 @@ struct renesas_usb3 { struct usb_gadget_driver *driver; struct extcon_dev *extcon; struct work_struct extcon_work; + struct dentry *dentry; struct renesas_usb3_ep *usb3_ep; int num_usb3_eps; @@ -2397,8 +2398,12 @@ static void renesas_usb3_debugfs_init(struct renesas_usb3 *usb3, file = debugfs_create_file("b_device", 0644, root, usb3, &renesas_usb3_b_device_fops); - if (!file) + if (!file) { dev_info(dev, "%s: Can't create debugfs mode\n", __func__); + debugfs_remove_recursive(root); + } else { + usb3->dentry = root; + } } /*------- platform_driver ------------------------------------------------*/ @@ -2406,6 +2411,7 @@ static int renesas_usb3_remove(struct platform_device *pdev) { struct renesas_usb3 *usb3 = platform_get_drvdata(pdev); + debugfs_remove_recursive(usb3->dentry); device_remove_file(&pdev->dev, &dev_attr_role); usb_del_gadget_udc(&usb3->gadget); -- GitLab From a61b3378b8473694ebd87e59c8a5dd35839c50fc Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 24 Jun 2018 11:23:42 +0300 Subject: [PATCH 0584/1291] RDMA/uverbs: Protect from attempts to create flows on unsupported QP commit 940efcc8889f0d15567eb07fc9fd69b06e366aa5 upstream. Flows can be created on UD and RAW_PACKET QP types. Attempts to provide other QP types as an input causes to various unpredictable failures. The reason is that in order to support all various types (e.g. XRC), we are supposed to use real_qp handle and not qp handle and expect to driver/FW to fail such (XRC) flows. The simpler and safer variant is to ban all QP types except UD and RAW_PACKET, instead of relying on driver/FW. Cc: # 3.11 Fixes: 436f2ad05a0b ("IB/core: Export ib_create/destroy_flow through uverbs") Cc: syzkaller Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/uverbs_cmd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 186dce6bba8f..b8229d7b0ff5 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3376,6 +3376,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, goto err_uobj; } + if (qp->qp_type != IB_QPT_UD && qp->qp_type != IB_QPT_RAW_PACKET) { + err = -EINVAL; + goto err_put; + } + flow_attr = kzalloc(sizeof(*flow_attr) + cmd.flow_attr.num_of_specs * sizeof(union ib_flow_spec), GFP_KERNEL); if (!flow_attr) { -- GitLab From 62310e69f1ae5923646adedfd916fa2b090bada7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Wed, 23 May 2018 08:20:21 +0200 Subject: [PATCH 0585/1291] net: dsa: qca8k: Force CPU port to its highest bandwidth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 79a4ed4f0f93fc65e48a0fc5247ffa5645f7b0cc upstream. By default autonegotiation is enabled to configure MAC on all ports. For the CPU port autonegotiation can not be used so we need to set some sensible defaults manually. This patch forces the default setting of the CPU port to 1000Mbps/full duplex which is the chip maximum capability. Also correct size of the bit field used to configure link speed. Fixes: 6b93fb46480a ("net-next: dsa: add new driver for qca8xxx family") Signed-off-by: Michal Vokáč Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/qca8k.c | 6 +++++- drivers/net/dsa/qca8k.h | 6 ++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 2a2bbf5e9c8e..d2bcf2b3c19c 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -490,6 +490,7 @@ qca8k_setup(struct dsa_switch *ds) { struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; int ret, i, phy_mode = -1; + u32 mask; /* Make sure that port 0 is the cpu port */ if (!dsa_is_cpu_port(ds, 0)) { @@ -515,7 +516,10 @@ qca8k_setup(struct dsa_switch *ds) if (ret < 0) return ret; - /* Enable CPU Port */ + /* Enable CPU Port, force it to maximum bandwidth and full-duplex */ + mask = QCA8K_PORT_STATUS_SPEED_1000 | QCA8K_PORT_STATUS_TXFLOW | + QCA8K_PORT_STATUS_RXFLOW | QCA8K_PORT_STATUS_DUPLEX; + qca8k_write(priv, QCA8K_REG_PORT_STATUS(QCA8K_CPU_PORT), mask); qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0, QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN); qca8k_port_set_status(priv, QCA8K_CPU_PORT, 1); diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index 1cf8a920d4ff..5bda16555478 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -51,8 +51,10 @@ #define QCA8K_GOL_MAC_ADDR0 0x60 #define QCA8K_GOL_MAC_ADDR1 0x64 #define QCA8K_REG_PORT_STATUS(_i) (0x07c + (_i) * 4) -#define QCA8K_PORT_STATUS_SPEED GENMASK(2, 0) -#define QCA8K_PORT_STATUS_SPEED_S 0 +#define QCA8K_PORT_STATUS_SPEED GENMASK(1, 0) +#define QCA8K_PORT_STATUS_SPEED_10 0 +#define QCA8K_PORT_STATUS_SPEED_100 0x1 +#define QCA8K_PORT_STATUS_SPEED_1000 0x2 #define QCA8K_PORT_STATUS_TXMAC BIT(2) #define QCA8K_PORT_STATUS_RXMAC BIT(3) #define QCA8K_PORT_STATUS_TXFLOW BIT(4) -- GitLab From 1fbc97b32b7e3756e2b84f652108281791e9d377 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Wed, 23 May 2018 08:20:20 +0200 Subject: [PATCH 0586/1291] net: dsa: qca8k: Enable RXMAC when bringing up a port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit eee1fe64765c562d8bcaf95e5631a8ea2f760f34 upstream. When a port is brought up/down do not enable/disable only the TXMAC but the RXMAC as well. This is essential for the CPU port to work. Fixes: 6b93fb46480a ("net-next: dsa: add new driver for qca8xxx family") Signed-off-by: Michal Vokáč Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/qca8k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index d2bcf2b3c19c..533aa5325819 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -473,7 +473,7 @@ qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode) static void qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable) { - u32 mask = QCA8K_PORT_STATUS_TXMAC; + u32 mask = QCA8K_PORT_STATUS_TXMAC | QCA8K_PORT_STATUS_RXMAC; /* Port 0 and 6 have no internal PHY */ if ((port > 0) && (port < 6)) -- GitLab From 2eda475b0849ba898abf99a61e8926b1335f6988 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Wed, 23 May 2018 08:20:18 +0200 Subject: [PATCH 0587/1291] net: dsa: qca8k: Add QCA8334 binding documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 218bbea11a777c156eb7bcbdc72867b32ae10985 upstream. Add support for the four-port variant of the Qualcomm QCA833x switch. The CPU port default link settings can be reconfigured using a fixed-link sub-node. Signed-off-by: Michal Vokáč Reviewed-by: Rob Herring Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/net/dsa/qca8k.txt | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt index 9c67ee4890d7..bbcb255c3150 100644 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -2,7 +2,10 @@ Required properties: -- compatible: should be "qca,qca8337" +- compatible: should be one of: + "qca,qca8334" + "qca,qca8337" + - #size-cells: must be 0 - #address-cells: must be 1 @@ -14,6 +17,20 @@ port and PHY id, each subnode describing a port needs to have a valid phandle referencing the internal PHY connected to it. The CPU port of this switch is always port 0. +A CPU port node has the following optional node: + +- fixed-link : Fixed-link subnode describing a link to a non-MDIO + managed entity. See + Documentation/devicetree/bindings/net/fixed-link.txt + for details. + +For QCA8K the 'fixed-link' sub-node supports only the following properties: + +- 'speed' (integer, mandatory), to indicate the link speed. Accepted + values are 10, 100 and 1000 +- 'full-duplex' (boolean, optional), to indicate that full duplex is + used. When absent, half duplex is assumed. + Example: @@ -53,6 +70,10 @@ Example: label = "cpu"; ethernet = <&gmac1>; phy-mode = "rgmii"; + fixed-link { + speed = 1000; + full-duplex; + }; }; port@1 { -- GitLab From 20556dc7f239f8ad724f8ff5fec289961439cd6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Wed, 23 May 2018 08:20:22 +0200 Subject: [PATCH 0588/1291] net: dsa: qca8k: Allow overwriting CPU port setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9bb2289f90e671bdb78e306974187424ac19ff8e upstream. Implement adjust_link function that allows to overwrite default CPU port setting using fixed-link device tree subnode. Signed-off-by: Michal Vokáč Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/qca8k.c | 43 +++++++++++++++++++++++++++++++++++++++++ drivers/net/dsa/qca8k.h | 1 + 2 files changed, 44 insertions(+) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 533aa5325819..9645c8f05c7f 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -588,6 +588,47 @@ qca8k_setup(struct dsa_switch *ds) return 0; } +static void +qca8k_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phy) +{ + struct qca8k_priv *priv = ds->priv; + u32 reg; + + /* Force fixed-link setting for CPU port, skip others. */ + if (!phy_is_pseudo_fixed_link(phy)) + return; + + /* Set port speed */ + switch (phy->speed) { + case 10: + reg = QCA8K_PORT_STATUS_SPEED_10; + break; + case 100: + reg = QCA8K_PORT_STATUS_SPEED_100; + break; + case 1000: + reg = QCA8K_PORT_STATUS_SPEED_1000; + break; + default: + dev_dbg(priv->dev, "port%d link speed %dMbps not supported.\n", + port, phy->speed); + return; + } + + /* Set duplex mode */ + if (phy->duplex == DUPLEX_FULL) + reg |= QCA8K_PORT_STATUS_DUPLEX; + + /* Force flow control */ + if (dsa_is_cpu_port(ds, port)) + reg |= QCA8K_PORT_STATUS_RXFLOW | QCA8K_PORT_STATUS_TXFLOW; + + /* Force link down before changing MAC options */ + qca8k_port_set_status(priv, port, 0); + qca8k_write(priv, QCA8K_REG_PORT_STATUS(port), reg); + qca8k_port_set_status(priv, port, 1); +} + static int qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum) { @@ -836,6 +877,7 @@ qca8k_get_tag_protocol(struct dsa_switch *ds) static const struct dsa_switch_ops qca8k_switch_ops = { .get_tag_protocol = qca8k_get_tag_protocol, .setup = qca8k_setup, + .adjust_link = qca8k_adjust_link, .get_strings = qca8k_get_strings, .phy_read = qca8k_phy_read, .phy_write = qca8k_phy_write, @@ -867,6 +909,7 @@ qca8k_sw_probe(struct mdio_device *mdiodev) return -ENOMEM; priv->bus = mdiodev->bus; + priv->dev = &mdiodev->dev; /* read the switches ID register */ id = qca8k_read(priv, QCA8K_REG_MASK_CTRL); diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index 5bda16555478..613fe5c50236 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -167,6 +167,7 @@ struct qca8k_priv { struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS]; struct dsa_switch *ds; struct mutex reg_mutex; + struct device *dev; }; struct qca8k_mib_desc { -- GitLab From 1494a3a70ce0feb8b9fe47f3cb30e804d2d4c2d5 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 27 Jul 2018 18:15:46 +0200 Subject: [PATCH 0589/1291] ipv4: remove BUG_ON() from fib_compute_spec_dst [ Upstream commit 9fc12023d6f51551d6ca9ed7e02ecc19d79caf17 ] Remove BUG_ON() from fib_compute_spec_dst routine and check in_dev pointer during flowi4 data structure initialization. fib_compute_spec_dst routine can be run concurrently with device removal where ip_ptr net_device pointer is set to NULL. This can happen if userspace enables pkt info on UDP rx socket and the device is removed while traffic is flowing Fixes: 35ebf65e851c ("ipv4: Create and use fib_compute_spec_dst() helper") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_frontend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 67eebcb113f3..5bbdd05d0cd3 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -282,19 +282,19 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) return ip_hdr(skb)->daddr; in_dev = __in_dev_get_rcu(dev); - BUG_ON(!in_dev); net = dev_net(dev); scope = RT_SCOPE_UNIVERSE; if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { + bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev); struct flowi4 fl4 = { .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_oif = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), .flowi4_scope = scope, - .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0, + .flowi4_mark = vmark ? skb->mark : 0, }; if (!fib_lookup(net, &fl4, &res, 0)) return FIB_RES_PREFSRC(net, res); -- GitLab From 11b694387ab493fbc10e203109ed2faed5d592b1 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 26 Jul 2018 23:40:33 +0300 Subject: [PATCH 0590/1291] net: ena: Fix use of uninitialized DMA address bits field [ Upstream commit 101f0cd4f2216d32f1b8a75a2154cf3997484ee2 ] UBSAN triggers the following undefined behaviour warnings: [...] [ 13.236124] UBSAN: Undefined behaviour in drivers/net/ethernet/amazon/ena/ena_eth_com.c:468:22 [ 13.240043] shift exponent 64 is too large for 64-bit type 'long long unsigned int' [...] [ 13.744769] UBSAN: Undefined behaviour in drivers/net/ethernet/amazon/ena/ena_eth_com.c:373:4 [ 13.748694] shift exponent 64 is too large for 64-bit type 'long long unsigned int' [...] When splitting the address to high and low, GENMASK_ULL is used to generate a bitmask with dma_addr_bits field from io_sq (in ena_com_prepare_tx and ena_com_add_single_rx_desc). The problem is that dma_addr_bits is not initialized with a proper value (besides being cleared in ena_com_create_io_queue). Assign dma_addr_bits the correct value that is stored in ena_dev when initializing the SQ. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Gal Pressman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amazon/ena/ena_com.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 52beba8c7a39..e3b7a71fcad9 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -331,6 +331,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr)); + io_sq->dma_addr_bits = ena_dev->dma_addr_bits; io_sq->desc_entry_size = (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ? sizeof(struct ena_eth_io_tx_desc) : -- GitLab From 112e80ac82851912fd8dae1e4c0bf1a0fb3bffeb Mon Sep 17 00:00:00 2001 From: tangpengpeng Date: Thu, 26 Jul 2018 14:45:16 +0800 Subject: [PATCH 0591/1291] net: fix amd-xgbe flow-control issue [ Upstream commit 7f3fc7ddf719cd6faaf787722c511f6918ac6aab ] If we enable or disable xgbe flow-control by ethtool , it does't work.Because the parameter is not properly assigned,so we need to adjust the assignment order of the parameters. Fixes: c1ce2f77366b ("amd-xgbe: Fix flow control setting logic") Signed-off-by: tangpengpeng Acked-by: Tom Lendacky Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 1b45cd73a258..119777986ea4 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1128,14 +1128,14 @@ static void xgbe_phy_adjust_link(struct xgbe_prv_data *pdata) if (pdata->tx_pause != pdata->phy.tx_pause) { new_state = 1; - pdata->hw_if.config_tx_flow_control(pdata); pdata->tx_pause = pdata->phy.tx_pause; + pdata->hw_if.config_tx_flow_control(pdata); } if (pdata->rx_pause != pdata->phy.rx_pause) { new_state = 1; - pdata->hw_if.config_rx_flow_control(pdata); pdata->rx_pause = pdata->phy.rx_pause; + pdata->hw_if.config_rx_flow_control(pdata); } /* Speed support */ -- GitLab From ccdbe7e2374a0be4ab81d70b53d23dd12b043648 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 28 Jul 2018 09:52:10 +0200 Subject: [PATCH 0592/1291] net: lan78xx: fix rx handling before first packet is send [ Upstream commit 136f55f660192ce04af091642efc75d85e017364 ] As long the bh tasklet isn't scheduled once, no packet from the rx path will be handled. Since the tx path also schedule the same tasklet this situation only persits until the first packet transmission. So fix this issue by scheduling the tasklet after link reset. Link: https://github.com/raspberrypi/linux/issues/2617 Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet") Suggested-by: Floris Bos Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 0aa91ab9a0fb..9e3f632e22f1 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1216,6 +1216,8 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) mod_timer(&dev->stat_monitor, jiffies + STAT_UPDATE_TIMER); } + + tasklet_schedule(&dev->bh); } return ret; -- GitLab From e071e2fdf45e1505436ad61e42b0d44904db1997 Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Fri, 27 Jul 2018 18:57:47 +0300 Subject: [PATCH 0593/1291] net: mdio-mux: bcm-iproc: fix wrong getter and setter pair [ Upstream commit b0753408aadf32c7ece9e6b765017881e54af833 ] mdio_mux_iproc_probe() uses platform_set_drvdata() to store md pointer in device, whereas mdio_mux_iproc_remove() restores md pointer by dev_get_platdata(&pdev->dev). This leads to wrong resources release. The patch replaces getter to platform_get_drvdata. Fixes: 98bc865a1ec8 ("net: mdio-mux: Add MDIO mux driver for iProc SoCs") Signed-off-by: Anton Vasilyev Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/mdio-mux-bcm-iproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c index 0831b7142df7..0c5b68e7da51 100644 --- a/drivers/net/phy/mdio-mux-bcm-iproc.c +++ b/drivers/net/phy/mdio-mux-bcm-iproc.c @@ -218,7 +218,7 @@ static int mdio_mux_iproc_probe(struct platform_device *pdev) static int mdio_mux_iproc_remove(struct platform_device *pdev) { - struct iproc_mdiomux_desc *md = dev_get_platdata(&pdev->dev); + struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev); mdio_mux_uninit(md->mux_handle); mdiobus_unregister(md->mii_bus); -- GitLab From 7f36a065963aebecc94d2be1e37f30162e5fe229 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 26 Jul 2018 15:05:37 +0300 Subject: [PATCH 0594/1291] NET: stmmac: align DMA stuff to largest cache line length [ Upstream commit 9939a46d90c6c76f4533d534dbadfa7b39dc6acc ] As for today STMMAC_ALIGN macro (which is used to align DMA stuff) relies on L1 line length (L1_CACHE_BYTES). This isn't correct in case of system with several cache levels which might have L1 cache line length smaller than L2 line. This can lead to sharing one cache line between DMA buffer and other data, so we can lose this data while invalidate DMA buffer before DMA transaction. Fix that by using SMP_CACHE_BYTES instead of L1_CACHE_BYTES for aligning. Signed-off-by: Eugeniy Paltsev Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 27f2e650e27b..1a9a382bf1c4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -51,7 +51,7 @@ #include #include "dwmac1000.h" -#define STMMAC_ALIGN(x) L1_CACHE_ALIGN(x) +#define STMMAC_ALIGN(x) __ALIGN_KERNEL(x, SMP_CACHE_BYTES) #define TSO_MAX_BUFF_SIZE (SZ_16K - 1) /* Module parameters */ -- GitLab From 7309441887f64f6ed942408a569bd629386cec71 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 27 Jul 2018 17:19:12 -0400 Subject: [PATCH 0595/1291] tcp_bbr: fix bw probing to raise in-flight data for very small BDPs [ Upstream commit 383d470936c05554219094a4d364d964cb324827 ] For some very small BDPs (with just a few packets) there was a quantization effect where the target number of packets in flight during the super-unity-gain (1.25x) phase of gain cycling was implicitly truncated to a number of packets no larger than the normal unity-gain (1.0x) phase of gain cycling. This meant that in multi-flow scenarios some flows could get stuck with a lower bandwidth, because they did not push enough packets inflight to discover that there was more bandwidth available. This was really only an issue in multi-flow LAN scenarios, where RTTs and BDPs are low enough for this to be an issue. This fix ensures that gain cycling can raise inflight for small BDPs by ensuring that in PROBE_BW mode target inflight values with a super-unity gain are always greater than inflight values with a gain <= 1. Importantly, this applies whether the inflight value is calculated for use as a cwnd value, or as a target inflight value for the end of the super-unity phase in bbr_is_next_cycle_phase() (both need to be bigger to ensure we can probe with more packets in flight reliably). This is a candidate fix for stable releases. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Acked-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Acked-by: Priyaranjan Jha Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bbr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 9a0b952dd09b..06f247ca9197 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -353,6 +353,10 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ cwnd = (cwnd + 1) & ~1U; + /* Ensure gain cycling gets inflight above BDP even for small BDPs. */ + if (bbr->mode == BBR_PROBE_BW && gain > BBR_UNIT) + cwnd += 2; + return cwnd; } -- GitLab From da970765b24cad557bde1b0372166e46c1276230 Mon Sep 17 00:00:00 2001 From: Xiao Liang Date: Fri, 27 Jul 2018 17:56:08 +0800 Subject: [PATCH 0596/1291] xen-netfront: wait xenbus state change when load module manually [ Upstream commit 822fb18a82abaf4ee7058793d95d340f5dab7bfc ] When loading module manually, after call xenbus_switch_state to initializes the state of the netfront device, the driver state did not change so fast that may lead no dev created in latest kernel. This patch adds wait to make sure xenbus knows the driver is not in closed/unknown state. Current state: [vm]# ethtool eth0 Settings for eth0: Link detected: yes [vm]# modprobe -r xen_netfront [vm]# modprobe xen_netfront [vm]# ethtool eth0 Settings for eth0: Cannot get device settings: No such device Cannot get wake-on-lan settings: No such device Cannot get message level: No such device Cannot get link status: No such device No data available With the patch installed. [vm]# ethtool eth0 Settings for eth0: Link detected: yes [vm]# modprobe -r xen_netfront [vm]# modprobe xen_netfront [vm]# ethtool eth0 Settings for eth0: Link detected: yes Signed-off-by: Xiao Liang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 04e0765be5e7..dfc076f9ee4b 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -87,6 +87,7 @@ struct netfront_cb { /* IRQ name is queue name with "-tx" or "-rx" appended */ #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3) +static DECLARE_WAIT_QUEUE_HEAD(module_load_q); static DECLARE_WAIT_QUEUE_HEAD(module_unload_q); struct netfront_stats { @@ -1330,6 +1331,11 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netif_carrier_off(netdev); xenbus_switch_state(dev, XenbusStateInitialising); + wait_event(module_load_q, + xenbus_read_driver_state(dev->otherend) != + XenbusStateClosed && + xenbus_read_driver_state(dev->otherend) != + XenbusStateUnknown); return netdev; exit: -- GitLab From e208cda5f10e131f149e59d7de4ceb943382f4c6 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 27 Jul 2018 16:54:44 +0100 Subject: [PATCH 0597/1291] netlink: Do not subscribe to non-existent groups [ Upstream commit 7acf9d4237c46894e0fa0492dd96314a41742e84 ] Make ABI more strict about subscribing to group > ngroups. Code doesn't check for that and it looks bogus. (one can subscribe to non-existing group) Still, it's possible to bind() to all possible groups with (-1) Cc: "David S. Miller" Cc: Herbert Xu Cc: Steffen Klassert Cc: netdev@vger.kernel.org Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b3932846f6c4..143d9001e87d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -976,6 +976,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (err) return err; } + groups &= (1UL << nlk->ngroups) - 1; bound = nlk->bound; if (bound) { -- GitLab From fffd3058eaf438e71cd301c14a0c67161a5864df Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 30 Jul 2018 18:32:36 +0100 Subject: [PATCH 0598/1291] netlink: Don't shift with UB on nlk->ngroups [ Upstream commit 61f4b23769f0cc72ae62c9a81cf08f0397d40da8 ] On i386 nlk->ngroups might be 32 or 0. Which leads to UB, resulting in hang during boot. Check for 0 ngroups and use (unsigned long long) as a type to shift. Fixes: 7acf9d4237c4 ("netlink: Do not subscribe to non-existent groups"). Reported-by: kernel test robot Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 143d9001e87d..b2fcbf012056 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -976,7 +976,11 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (err) return err; } - groups &= (1UL << nlk->ngroups) - 1; + + if (nlk->ngroups == 0) + groups = 0; + else + groups &= (1ULL << nlk->ngroups) - 1; bound = nlk->bound; if (bound) { -- GitLab From 5a1baf194475849ff31a34c30d540bc1795a2a7c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 May 2018 14:47:25 -0700 Subject: [PATCH 0599/1291] tcp: do not force quickack when receiving out-of-order packets [ Upstream commit a3893637e1eb0ef5eb1bbc52b3a8d2dfa317a35d ] As explained in commit 9f9843a751d0 ("tcp: properly handle stretch acks in slow start"), TCP stacks have to consider how many packets are acknowledged in one single ACK, because of GRO, but also because of ACK compression or losses. We plan to add SACK compression in the following patch, we must therefore not call tcp_enter_quickack_mode() Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b86e7b8beb1d..e12bf35beb1c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4721,8 +4721,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp))) goto out_of_window; - tcp_enter_quickack_mode(sk); - if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { /* Partial packet, seq < rcv_next < end_seq */ SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n", -- GitLab From 1c005489fa9876713d8f1f626947017dd45a9bfc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 May 2018 15:08:56 -0700 Subject: [PATCH 0600/1291] tcp: add max_quickacks param to tcp_incr_quickack and tcp_enter_quickack_mode [ Upstream commit 9a9c9b51e54618861420093ae6e9b50a961914c5 ] We want to add finer control of the number of ACK packets sent after ECN events. This patch is not changing current behavior, it only enables following change. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 2 +- net/ipv4/tcp_dctcp.c | 4 ++-- net/ipv4/tcp_input.c | 24 +++++++++++++----------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 3173dd12b8cc..686e33ea76e7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -372,7 +372,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); -void tcp_enter_quickack_mode(struct sock *sk); +void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks); static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) { diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index c78fb53988a1..1a9b88c8cf72 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -138,7 +138,7 @@ static void dctcp_ce_state_0_to_1(struct sock *sk) */ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) __tcp_send_ack(sk, ca->prior_rcv_nxt); - tcp_enter_quickack_mode(sk); + tcp_enter_quickack_mode(sk, 1); } ca->prior_rcv_nxt = tp->rcv_nxt; @@ -159,7 +159,7 @@ static void dctcp_ce_state_1_to_0(struct sock *sk) */ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) __tcp_send_ack(sk, ca->prior_rcv_nxt); - tcp_enter_quickack_mode(sk); + tcp_enter_quickack_mode(sk, 1); } ca->prior_rcv_nxt = tp->rcv_nxt; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e12bf35beb1c..bc9c48d01980 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -198,21 +198,23 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) } } -static void tcp_incr_quickack(struct sock *sk) +static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks) { struct inet_connection_sock *icsk = inet_csk(sk); unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); if (quickacks == 0) quickacks = 2; + quickacks = min(quickacks, max_quickacks); if (quickacks > icsk->icsk_ack.quick) - icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); + icsk->icsk_ack.quick = quickacks; } -void tcp_enter_quickack_mode(struct sock *sk) +void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) { struct inet_connection_sock *icsk = inet_csk(sk); - tcp_incr_quickack(sk); + + tcp_incr_quickack(sk, max_quickacks); icsk->icsk_ack.pingpong = 0; icsk->icsk_ack.ato = TCP_ATO_MIN; } @@ -257,7 +259,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) * it is probably a retransmit. */ if (tp->ecn_flags & TCP_ECN_SEEN) - tcp_enter_quickack_mode((struct sock *)tp); + tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS); break; case INET_ECN_CE: if (tcp_ca_needs_ecn((struct sock *)tp)) @@ -265,7 +267,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { /* Better not delay acks, sender can have a very low cwnd */ - tcp_enter_quickack_mode((struct sock *)tp); + tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS); tp->ecn_flags |= TCP_ECN_DEMAND_CWR; } tp->ecn_flags |= TCP_ECN_SEEN; @@ -686,7 +688,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) /* The _first_ data packet received, initialize * delayed ACK engine. */ - tcp_incr_quickack(sk); + tcp_incr_quickack(sk, TCP_MAX_QUICKACKS); icsk->icsk_ack.ato = TCP_ATO_MIN; } else { int m = now - icsk->icsk_ack.lrcvtime; @@ -702,7 +704,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) /* Too long gap. Apparently sender failed to * restart window, so that we send ACKs quickly. */ - tcp_incr_quickack(sk); + tcp_incr_quickack(sk, TCP_MAX_QUICKACKS); sk_mem_reclaim(sk); } } @@ -4160,7 +4162,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); - tcp_enter_quickack_mode(sk); + tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); if (tcp_is_sack(tp) && sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; @@ -4710,7 +4712,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); out_of_window: - tcp_enter_quickack_mode(sk); + tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); inet_csk_schedule_ack(sk); drop: tcp_drop(sk, skb); @@ -5791,7 +5793,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * to stand against the temptation 8) --ANK */ inet_csk_schedule_ack(sk); - tcp_enter_quickack_mode(sk); + tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); -- GitLab From fd31083cbe050d10b1cb4f681929154cffe18fb6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 May 2018 15:08:57 -0700 Subject: [PATCH 0601/1291] tcp: do not aggressively quick ack after ECN events [ Upstream commit 522040ea5fdd1c33bbf75e1d7c7c0422b96a94ef ] ECN signals currently forces TCP to enter quickack mode for up to 16 (TCP_MAX_QUICKACKS) following incoming packets. We believe this is not needed, and only sending one immediate ack for the current packet should be enough. This should reduce the extra load noticed in DCTCP environments, after congestion events. This is part 2 of our effort to reduce pure ACK packets. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bc9c48d01980..a7bb96054852 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -259,7 +259,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) * it is probably a retransmit. */ if (tp->ecn_flags & TCP_ECN_SEEN) - tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS); + tcp_enter_quickack_mode((struct sock *)tp, 1); break; case INET_ECN_CE: if (tcp_ca_needs_ecn((struct sock *)tp)) @@ -267,7 +267,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { /* Better not delay acks, sender can have a very low cwnd */ - tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS); + tcp_enter_quickack_mode((struct sock *)tp, 1); tp->ecn_flags |= TCP_ECN_DEMAND_CWR; } tp->ecn_flags |= TCP_ECN_SEEN; -- GitLab From 1f04d750f6688c0fe0cc13fd05d5744d1e7b50c3 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Mon, 4 Jun 2018 15:29:51 -0700 Subject: [PATCH 0602/1291] tcp: refactor tcp_ecn_check_ce to remove sk type cast [ Upstream commit f4c9f85f3b2cb7669830cd04d0be61192a4d2436 ] Refactor tcp_ecn_check_ce and __tcp_ecn_check_ce to accept struct sock* instead of tcp_sock* to clean up type casts. This is a pure refactor patch. Signed-off-by: Yousuk Seung Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a7bb96054852..124be1636efe 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -250,8 +250,10 @@ static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; } -static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) +static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); + switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) { case INET_ECN_NOT_ECT: /* Funny extension: if ECT is not set on a segment, @@ -259,31 +261,31 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) * it is probably a retransmit. */ if (tp->ecn_flags & TCP_ECN_SEEN) - tcp_enter_quickack_mode((struct sock *)tp, 1); + tcp_enter_quickack_mode(sk, 1); break; case INET_ECN_CE: - if (tcp_ca_needs_ecn((struct sock *)tp)) - tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE); + if (tcp_ca_needs_ecn(sk)) + tcp_ca_event(sk, CA_EVENT_ECN_IS_CE); if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { /* Better not delay acks, sender can have a very low cwnd */ - tcp_enter_quickack_mode((struct sock *)tp, 1); + tcp_enter_quickack_mode(sk, 1); tp->ecn_flags |= TCP_ECN_DEMAND_CWR; } tp->ecn_flags |= TCP_ECN_SEEN; break; default: - if (tcp_ca_needs_ecn((struct sock *)tp)) - tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE); + if (tcp_ca_needs_ecn(sk)) + tcp_ca_event(sk, CA_EVENT_ECN_NO_CE); tp->ecn_flags |= TCP_ECN_SEEN; break; } } -static void tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) +static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) { - if (tp->ecn_flags & TCP_ECN_OK) - __tcp_ecn_check_ce(tp, skb); + if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK) + __tcp_ecn_check_ce(sk, skb); } static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) @@ -710,7 +712,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) } icsk->icsk_ack.lrcvtime = now; - tcp_ecn_check_ce(tp, skb); + tcp_ecn_check_ce(sk, skb); if (skb->len >= 128) tcp_grow_window(sk, skb); @@ -4443,7 +4445,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) u32 seq, end_seq; bool fragstolen; - tcp_ecn_check_ce(tp, skb); + tcp_ecn_check_ce(sk, skb); if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); -- GitLab From 0a2f6725115dc17b97294403a5afca830c64e48a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Jun 2018 08:47:21 -0700 Subject: [PATCH 0603/1291] tcp: add one more quick ack after after ECN events [ Upstream commit 15ecbe94a45ef88491ca459b26efdd02f91edb6d ] Larry Brakmo proposal ( https://patchwork.ozlabs.org/patch/935233/ tcp: force cwnd at least 2 in tcp_cwnd_reduction) made us rethink about our recent patch removing ~16 quick acks after ECN events. tcp_enter_quickack_mode(sk, 1) makes sure one immediate ack is sent, but in the case the sender cwnd was lowered to 1, we do not want to have a delayed ack for the next packet we will receive. Fixes: 522040ea5fdd ("tcp: do not aggressively quick ack after ECN events") Signed-off-by: Eric Dumazet Reported-by: Neal Cardwell Cc: Lawrence Brakmo Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 124be1636efe..bdabd748f4bc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -261,7 +261,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) * it is probably a retransmit. */ if (tp->ecn_flags & TCP_ECN_SEEN) - tcp_enter_quickack_mode(sk, 1); + tcp_enter_quickack_mode(sk, 2); break; case INET_ECN_CE: if (tcp_ca_needs_ecn(sk)) @@ -269,7 +269,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { /* Better not delay acks, sender can have a very low cwnd */ - tcp_enter_quickack_mode(sk, 1); + tcp_enter_quickack_mode(sk, 2); tp->ecn_flags |= TCP_ECN_DEMAND_CWR; } tp->ecn_flags |= TCP_ECN_SEEN; -- GitLab From deaacd62c2175c624a41c1b3c721e23c660031e1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 3 Aug 2018 07:50:45 +0200 Subject: [PATCH 0604/1291] Linux 4.14.60 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 81b0e99dce80..5b48ec630990 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 59 +SUBLEVEL = 60 EXTRAVERSION = NAME = Petit Gorille -- GitLab From dc8edd08cd894a87b0136c7f077b48290eef14f6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jul 2018 06:30:54 -0700 Subject: [PATCH 0605/1291] bonding: avoid lockdep confusion in bond_get_stats() [ Upstream commit 7e2556e40026a1b0c16f37446ab398d5a5a892e4 ] syzbot found that the following sequence produces a LOCKDEP splat [1] ip link add bond10 type bond ip link add bond11 type bond ip link set bond11 master bond10 To fix this, we can use the already provided nest_level. This patch also provides correct nesting for dev->addr_list_lock [1] WARNING: possible recursive locking detected 4.18.0-rc6+ #167 Not tainted -------------------------------------------- syz-executor751/4439 is trying to acquire lock: (____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: spin_lock include/linux/spinlock.h:310 [inline] (____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: bond_get_stats+0xb4/0x560 drivers/net/bonding/bond_main.c:3426 but task is already holding lock: (____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: spin_lock include/linux/spinlock.h:310 [inline] (____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: bond_get_stats+0xb4/0x560 drivers/net/bonding/bond_main.c:3426 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&bond->stats_lock)->rlock); lock(&(&bond->stats_lock)->rlock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by syz-executor751/4439: #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnl_lock+0x17/0x20 net/core/rtnetlink.c:77 #1: (____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: spin_lock include/linux/spinlock.h:310 [inline] #1: (____ptrval____) (&(&bond->stats_lock)->rlock){+.+.}, at: bond_get_stats+0xb4/0x560 drivers/net/bonding/bond_main.c:3426 #2: (____ptrval____) (rcu_read_lock){....}, at: bond_get_stats+0x0/0x560 include/linux/compiler.h:215 stack backtrace: CPU: 0 PID: 4439 Comm: syz-executor751 Not tainted 4.18.0-rc6+ #167 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 print_deadlock_bug kernel/locking/lockdep.c:1765 [inline] check_deadlock kernel/locking/lockdep.c:1809 [inline] validate_chain kernel/locking/lockdep.c:2405 [inline] __lock_acquire.cold.64+0x1fb/0x486 kernel/locking/lockdep.c:3435 lock_acquire+0x1e4/0x540 kernel/locking/lockdep.c:3924 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:144 spin_lock include/linux/spinlock.h:310 [inline] bond_get_stats+0xb4/0x560 drivers/net/bonding/bond_main.c:3426 dev_get_stats+0x10f/0x470 net/core/dev.c:8316 bond_get_stats+0x232/0x560 drivers/net/bonding/bond_main.c:3432 dev_get_stats+0x10f/0x470 net/core/dev.c:8316 rtnl_fill_stats+0x4d/0xac0 net/core/rtnetlink.c:1169 rtnl_fill_ifinfo+0x1aa6/0x3fb0 net/core/rtnetlink.c:1611 rtmsg_ifinfo_build_skb+0xc8/0x190 net/core/rtnetlink.c:3268 rtmsg_ifinfo_event.part.30+0x45/0xe0 net/core/rtnetlink.c:3300 rtmsg_ifinfo_event net/core/rtnetlink.c:3297 [inline] rtnetlink_event+0x144/0x170 net/core/rtnetlink.c:4716 notifier_call_chain+0x180/0x390 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1735 call_netdevice_notifiers net/core/dev.c:1753 [inline] netdev_features_change net/core/dev.c:1321 [inline] netdev_change_features+0xb3/0x110 net/core/dev.c:7759 bond_compute_features.isra.47+0x585/0xa50 drivers/net/bonding/bond_main.c:1120 bond_enslave+0x1b25/0x5da0 drivers/net/bonding/bond_main.c:1755 bond_do_ioctl+0x7cb/0xae0 drivers/net/bonding/bond_main.c:3528 dev_ifsioc+0x43c/0xb30 net/core/dev_ioctl.c:327 dev_ioctl+0x1b5/0xcc0 net/core/dev_ioctl.c:493 sock_do_ioctl+0x1d3/0x3e0 net/socket.c:992 sock_ioctl+0x30d/0x680 net/socket.c:1093 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:500 [inline] do_vfs_ioctl+0x1de/0x1720 fs/ioctl.c:684 ksys_ioctl+0xa9/0xd0 fs/ioctl.c:701 __do_sys_ioctl fs/ioctl.c:708 [inline] __se_sys_ioctl fs/ioctl.c:706 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:706 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x440859 Code: e8 2c af 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b 10 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffc51a92878 EFLAGS: 00000213 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000440859 RDX: 0000000020000040 RSI: 0000000000008990 RDI: 0000000000000003 RBP: 0000000000000000 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000022d5880 R11: 0000000000000213 R12: 0000000000007390 R13: 0000000000401db0 R14: 0000000000000000 R15: 0000000000000000 Signed-off-by: Eric Dumazet Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 00245b73c224..15aedb64a02b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1687,6 +1687,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_upper_unlink; } + bond->nest_level = dev_get_nest_level(bond_dev) + 1; + /* If the mode uses primary, then the following is handled by * bond_change_active_slave(). */ @@ -1734,7 +1736,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (bond_mode_uses_xmit_hash(bond)) bond_update_slave_arr(bond, NULL); - bond->nest_level = dev_get_nest_level(bond_dev); netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n", slave_dev->name, @@ -3379,6 +3380,13 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res, } } +static int bond_get_nest_level(struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + + return bond->nest_level; +} + static void bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 *stats) { @@ -3387,7 +3395,7 @@ static void bond_get_stats(struct net_device *bond_dev, struct list_head *iter; struct slave *slave; - spin_lock(&bond->stats_lock); + spin_lock_nested(&bond->stats_lock, bond_get_nest_level(bond_dev)); memcpy(stats, &bond->bond_stats, sizeof(*stats)); rcu_read_lock(); @@ -4182,6 +4190,7 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_neigh_setup = bond_neigh_setup, .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid, + .ndo_get_lock_subclass = bond_get_nest_level, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_netpoll_setup = bond_netpoll_setup, .ndo_netpoll_cleanup = bond_netpoll_cleanup, @@ -4680,6 +4689,7 @@ static int bond_init(struct net_device *bond_dev) if (!bond->wq) return -ENOMEM; + bond->nest_level = SINGLE_DEPTH_NESTING; netdev_lockdep_set_classes(bond_dev); list_add_tail(&bond->bond_list, &bn->dev_list); -- GitLab From fc63057d5fdad14ccf94dffe746187388aad2e55 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jul 2018 20:09:11 -0700 Subject: [PATCH 0606/1291] inet: frag: enforce memory limits earlier [ Upstream commit 56e2c94f055d328f5f6b0a5c1721cca2f2d4e0a1 ] We currently check current frags memory usage only when a new frag queue is created. This allows attackers to first consume the memory budget (default : 4 MB) creating thousands of frag queues, then sending tiny skbs to exceed high_thresh limit by 2 to 3 order of magnitude. Note that before commit 648700f76b03 ("inet: frags: use rhashtables for reassembly units"), work queue could be starved under DOS, getting no cpu cycles. After commit 648700f76b03, only the per frag queue timer can eventually remove an incomplete frag queue and its skbs. Fixes: b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue") Signed-off-by: Eric Dumazet Reported-by: Jann Horn Cc: Florian Westphal Cc: Peter Oskolkov Cc: Paolo Abeni Acked-by: Florian Westphal Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_fragment.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index e691705f0a85..ba4454ecdf0f 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -356,11 +356,6 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, { struct inet_frag_queue *q; - if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) { - inet_frag_schedule_worker(f); - return NULL; - } - q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); if (!q) return NULL; @@ -397,6 +392,11 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frag_queue *q; int depth = 0; + if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) { + inet_frag_schedule_worker(f); + return NULL; + } + if (frag_mem_limit(nf) > nf->low_thresh) inet_frag_schedule_worker(f); -- GitLab From 8721f360894576908a32f5c2979dbe640731a97b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jul 2018 21:50:29 -0700 Subject: [PATCH 0607/1291] ipv4: frags: handle possible skb truesize change [ Upstream commit 4672694bd4f1aebdab0ad763ae4716e89cb15221 ] ip_frag_queue() might call pskb_pull() on one skb that is already in the fragment queue. We need to take care of possible truesize change, or we might have an imbalance of the netns frags memory usage. IPv6 is immune to this bug, because RFC5722, Section 4, amended by Errata ID 3089 states : When reassembling an IPv6 datagram, if one or more its constituent fragments is determined to be an overlapping fragment, the entire datagram (and any constituent fragments) MUST be silently discarded. Fixes: 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index df8fe0503de0..4cb1befc3949 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -447,11 +447,16 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */ if (i < next->len) { + int delta = -next->truesize; + /* Eat head of the next overlapped fragment * and leave the loop. The next ones cannot overlap. */ if (!pskb_pull(next, i)) goto err; + delta += next->truesize; + if (delta) + add_frag_mem_limit(qp->q.net, delta); FRAG_CB(next)->offset += i; qp->q.meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) -- GitLab From bcbdea1371599dab43a16dcb3649f811cf602d03 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 31 Jul 2018 17:12:52 -0700 Subject: [PATCH 0608/1291] net: dsa: Do not suspend/resume closed slave_dev [ Upstream commit a94c689e6c9e72e722f28339e12dff191ee5a265 ] If a DSA slave network device was previously disabled, there is no need to suspend or resume it. Fixes: 2446254915a7 ("net: dsa: allow switch drivers to implement suspend/resume hooks") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dsa/slave.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 865e29e62bad..242e74b9d454 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1219,6 +1219,9 @@ int dsa_slave_suspend(struct net_device *slave_dev) { struct dsa_slave_priv *p = netdev_priv(slave_dev); + if (!netif_running(slave_dev)) + return 0; + netif_device_detach(slave_dev); if (p->phy) { @@ -1236,6 +1239,9 @@ int dsa_slave_resume(struct net_device *slave_dev) { struct dsa_slave_priv *p = netdev_priv(slave_dev); + if (!netif_running(slave_dev)) + return 0; + netif_device_attach(slave_dev); if (p->phy) { -- GitLab From 1828cb3d10d9fee017fdd13dc655f708a1f39d68 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 31 Jul 2018 21:13:16 +0000 Subject: [PATCH 0609/1291] netlink: Fix spectre v1 gadget in netlink_create() [ Upstream commit bc5b6c0b62b932626a135f516a41838c510c6eba ] 'protocol' is a user-controlled value, so sanitize it after the bounds check to avoid using it for speculative out-of-bounds access to arrays indexed by it. This addresses the following accesses detected with the help of smatch: * net/netlink/af_netlink.c:654 __netlink_create() warn: potential spectre issue 'nlk_cb_mutex_keys' [w] * net/netlink/af_netlink.c:654 __netlink_create() warn: potential spectre issue 'nlk_cb_mutex_key_strings' [w] * net/netlink/af_netlink.c:685 netlink_create() warn: potential spectre issue 'nl_table' [w] (local cap) Cc: Josh Poimboeuf Signed-off-by: Jeremy Cline Reviewed-by: Josh Poimboeuf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b2fcbf012056..68c9d1833b95 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -647,6 +648,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (protocol < 0 || protocol >= MAX_LINKS) return -EPROTONOSUPPORT; + protocol = array_index_nospec(protocol, MAX_LINKS); netlink_lock_table(); #ifdef CONFIG_MODULES -- GitLab From 83a46456c983258eb0ba6cfc1ad650df9f59c9c1 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Tue, 31 Jul 2018 15:08:20 +0100 Subject: [PATCH 0610/1291] net: stmmac: Fix WoL for PCI-based setups [ Upstream commit b7d0f08e9129c45ed41bc0cfa8e77067881e45fd ] WoL won't work in PCI-based setups because we are not saving the PCI EP state before entering suspend state and not allowing D3 wake. Fix this by using a wrapper around stmmac_{suspend/resume} which correctly sets the PCI EP state. Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/stmicro/stmmac/stmmac_pci.c | 40 ++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 8d375e51a526..6a393b16a1fc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -257,7 +257,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev, return -ENOMEM; /* Enable pci device */ - ret = pcim_enable_device(pdev); + ret = pci_enable_device(pdev); if (ret) { dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); @@ -300,9 +300,45 @@ static int stmmac_pci_probe(struct pci_dev *pdev, static void stmmac_pci_remove(struct pci_dev *pdev) { stmmac_dvr_remove(&pdev->dev); + pci_disable_device(pdev); } -static SIMPLE_DEV_PM_OPS(stmmac_pm_ops, stmmac_suspend, stmmac_resume); +static int stmmac_pci_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = stmmac_suspend(dev); + if (ret) + return ret; + + ret = pci_save_state(pdev); + if (ret) + return ret; + + pci_disable_device(pdev); + pci_wake_from_d3(pdev, true); + return 0; +} + +static int stmmac_pci_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + pci_restore_state(pdev); + pci_set_power_state(pdev, PCI_D0); + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + pci_set_master(pdev); + + return stmmac_resume(dev); +} + +static SIMPLE_DEV_PM_OPS(stmmac_pm_ops, stmmac_pci_suspend, stmmac_pci_resume); /* synthetic ID, no official vendor */ #define PCI_VENDOR_ID_STMMAC 0x700 -- GitLab From e0638b6a054cef5ad820f7380ff59463c6519b57 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 1 Aug 2018 13:27:23 +0100 Subject: [PATCH 0611/1291] rxrpc: Fix user call ID check in rxrpc_service_prealloc_one [ Upstream commit c01f6c9b3207e52fc9973a066a856ddf7a0538d8 ] There just check the user call ID isn't already in use, hence should compare user_call_ID with xcall->user_call_ID, which is current node's user_call_ID. Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and sendmsg/recvmsg") Suggested-by: David Howells Signed-off-by: YueHaibing Signed-off-by: David Howells Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/call_accept.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 3028298ca561..62b1581d44a5 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -115,9 +115,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, while (*pp) { parent = *pp; xcall = rb_entry(parent, struct rxrpc_call, sock_node); - if (user_call_ID < call->user_call_ID) + if (user_call_ID < xcall->user_call_ID) pp = &(*pp)->rb_left; - else if (user_call_ID > call->user_call_ID) + else if (user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else goto id_in_use; -- GitLab From a766ccbf1d29db4f3a793131c2e986287757107c Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 16 Jul 2018 11:49:27 +0300 Subject: [PATCH 0612/1291] net/mlx5e: E-Switch, Initialize eswitch only if eswitch manager [ Upstream commit 5f5991f36dce1e69dd8bd7495763eec2e28f08e7 ] Execute mlx5_eswitch_init() only if we have MLX5_ESWITCH_MANAGER capabilities. Do the same for mlx5_eswitch_cleanup(). Fixes: a9f7705ffd66 ("net/mlx5: Unify vport manager capability check") Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 667415301066..f697084937c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1616,7 +1616,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) int vport_num; int err; - if (!MLX5_VPORT_MANAGER(dev)) + if (!MLX5_ESWITCH_MANAGER(dev)) return 0; esw_info(dev, @@ -1689,7 +1689,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) { - if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) + if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev)) return; esw_info(esw->dev, "cleanup\n"); -- GitLab From 953f918d548b569d37467fadb7266d0cd93a9ff6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 30 Jul 2018 14:27:15 -0700 Subject: [PATCH 0613/1291] squashfs: more metadata hardening commit d512584780d3e6a7cacb2f482834849453d444a1 upstream. Anatoly reports another squashfs fuzzing issue, where the decompression parameters themselves are in a compressed block. This causes squashfs_read_data() to be called in order to read the decompression options before the decompression stream having been set up, making squashfs go sideways. Reported-by: Anatoly Trosinenko Acked-by: Phillip Lougher Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/squashfs/block.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 2751476e6b6e..f098b9f1c396 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -167,6 +167,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, } if (compressed) { + if (!msblk->stream) + goto read_failure; length = squashfs_decompress(msblk, bh, b, offset, length, output); if (length < 0) -- GitLab From e7de67165efe052f1a05f4afae5b7cd4dc242b34 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 2 Aug 2018 08:43:35 -0700 Subject: [PATCH 0614/1291] squashfs: more metadata hardenings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 71755ee5350b63fb1f283de8561cdb61b47f4d1d upstream. The squashfs fragment reading code doesn't actually verify that the fragment is inside the fragment table. The end result _is_ verified to be inside the image when actually reading the fragment data, but before that is done, we may end up taking a page fault because the fragment table itself might not even exist. Another report from Anatoly and his endless squashfs image fuzzing. Reported-by: Анатолий Тросиненко Acked-by:: Phillip Lougher , Cc: Willy Tarreau Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/squashfs/fragment.c | 13 +++++++++---- fs/squashfs/squashfs_fs_sb.h | 1 + fs/squashfs/super.c | 5 +++-- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 86ad9a4b8c36..0681feab4a84 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c @@ -49,11 +49,16 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment, u64 *fragment_block) { struct squashfs_sb_info *msblk = sb->s_fs_info; - int block = SQUASHFS_FRAGMENT_INDEX(fragment); - int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); - u64 start_block = le64_to_cpu(msblk->fragment_index[block]); + int block, offset, size; struct squashfs_fragment_entry fragment_entry; - int size; + u64 start_block; + + if (fragment >= msblk->fragments) + return -EIO; + block = SQUASHFS_FRAGMENT_INDEX(fragment); + offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); + + start_block = le64_to_cpu(msblk->fragment_index[block]); size = squashfs_read_metadata(sb, &fragment_entry, &start_block, &offset, sizeof(fragment_entry)); diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 1da565cb50c3..ef69c31947bf 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -75,6 +75,7 @@ struct squashfs_sb_info { unsigned short block_log; long long bytes_used; unsigned int inodes; + unsigned int fragments; int xattr_ids; }; #endif diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index cf01e15a7b16..1516bb779b8d 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -175,6 +175,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) msblk->inode_table = le64_to_cpu(sblk->inode_table_start); msblk->directory_table = le64_to_cpu(sblk->directory_table_start); msblk->inodes = le32_to_cpu(sblk->inodes); + msblk->fragments = le32_to_cpu(sblk->fragments); flags = le16_to_cpu(sblk->flags); TRACE("Found valid superblock on %pg\n", sb->s_bdev); @@ -185,7 +186,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) TRACE("Filesystem size %lld bytes\n", msblk->bytes_used); TRACE("Block size %d\n", msblk->block_size); TRACE("Number of inodes %d\n", msblk->inodes); - TRACE("Number of fragments %d\n", le32_to_cpu(sblk->fragments)); + TRACE("Number of fragments %d\n", msblk->fragments); TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids)); TRACE("sblk->inode_table_start %llx\n", msblk->inode_table); TRACE("sblk->directory_table_start %llx\n", msblk->directory_table); @@ -272,7 +273,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_export_op = &squashfs_export_ops; handle_fragments: - fragments = le32_to_cpu(sblk->fragments); + fragments = msblk->fragments; if (fragments == 0) goto check_directory_table; -- GitLab From c8159f9a1ae9c9ea76b389666a37bd9263a761cd Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Fri, 27 Jul 2018 18:50:42 +0300 Subject: [PATCH 0615/1291] can: ems_usb: Fix memory leak on ems_usb_disconnect() commit 72c05f32f4a5055c9c8fe889bb6903ec959c0aad upstream. ems_usb_probe() allocates memory for dev->tx_msg_buffer, but there is no its deallocation in ems_usb_disconnect(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/ems_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index b00358297424..d0846ae9e0e4 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -1071,6 +1071,7 @@ static void ems_usb_disconnect(struct usb_interface *intf) usb_free_urb(dev->intr_urb); kfree(dev->intr_in_buffer); + kfree(dev->tx_msg_buffer); } } -- GitLab From 45c8178cf69ea21b6d0356b3e541c8e111ba6458 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Fri, 27 Jul 2018 22:43:01 +0000 Subject: [PATCH 0616/1291] net: socket: fix potential spectre v1 gadget in socketcall commit c8e8cd579bb4265651df8223730105341e61a2d1 upstream. 'call' is a user-controlled value, so sanitize the array index after the bounds check to avoid speculating past the bounds of the 'nargs' array. Found with the help of Smatch: net/socket.c:2508 __do_sys_socketcall() warn: potential spectre issue 'nargs' [r] (local cap) Cc: Josh Poimboeuf Cc: stable@vger.kernel.org Signed-off-by: Jeremy Cline Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/socket.c b/net/socket.c index 8b2bef6cfe42..d27922639a20 100644 --- a/net/socket.c +++ b/net/socket.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include @@ -2443,6 +2444,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) if (call < 1 || call > SYS_SENDMMSG) return -EINVAL; + call = array_index_nospec(call, SYS_SENDMMSG + 1); len = nargs[call]; if (len > sizeof(a)) -- GitLab From 34a938cd3ad45b5c6d67d83329ce1dcf239863a8 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Wed, 18 Jul 2018 10:29:28 +0800 Subject: [PATCH 0617/1291] virtio_balloon: fix another race between migration and ballooning commit 89da619bc18d79bca5304724c11d4ba3b67ce2c6 upstream. Kernel panic when with high memory pressure, calltrace looks like, PID: 21439 TASK: ffff881be3afedd0 CPU: 16 COMMAND: "java" #0 [ffff881ec7ed7630] machine_kexec at ffffffff81059beb #1 [ffff881ec7ed7690] __crash_kexec at ffffffff81105942 #2 [ffff881ec7ed7760] crash_kexec at ffffffff81105a30 #3 [ffff881ec7ed7778] oops_end at ffffffff816902c8 #4 [ffff881ec7ed77a0] no_context at ffffffff8167ff46 #5 [ffff881ec7ed77f0] __bad_area_nosemaphore at ffffffff8167ffdc #6 [ffff881ec7ed7838] __node_set at ffffffff81680300 #7 [ffff881ec7ed7860] __do_page_fault at ffffffff8169320f #8 [ffff881ec7ed78c0] do_page_fault at ffffffff816932b5 #9 [ffff881ec7ed78f0] page_fault at ffffffff8168f4c8 [exception RIP: _raw_spin_lock_irqsave+47] RIP: ffffffff8168edef RSP: ffff881ec7ed79a8 RFLAGS: 00010046 RAX: 0000000000000246 RBX: ffffea0019740d00 RCX: ffff881ec7ed7fd8 RDX: 0000000000020000 RSI: 0000000000000016 RDI: 0000000000000008 RBP: ffff881ec7ed79a8 R8: 0000000000000246 R9: 000000000001a098 R10: ffff88107ffda000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000008 R14: ffff881ec7ed7a80 R15: ffff881be3afedd0 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 It happens in the pagefault and results in double pagefault during compacting pages when memory allocation fails. Analysed the vmcore, the page leads to second pagefault is corrupted with _mapcount=-256, but private=0. It's caused by the race between migration and ballooning, and lock missing in virtballoon_migratepage() of virtio_balloon driver. This patch fix the bug. Fixes: e22504296d4f64f ("virtio_balloon: introduce migration primitives to balloon pages") Cc: stable@vger.kernel.org Signed-off-by: Jiang Biao Signed-off-by: Huang Chong Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_balloon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index f0b3a0b9d42f..36c9fbf70d44 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -490,7 +490,9 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, tell_host(vb, vb->inflate_vq); /* balloon's page migration 2nd step -- deflate "page" */ + spin_lock_irqsave(&vb_dev_info->pages_lock, flags); balloon_page_delete(page); + spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; set_page_pfns(vb, vb->pfns, page); tell_host(vb, vb->deflate_vq); -- GitLab From 7cf6b325f5545ba41953e2a49267632ea3a78af8 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 21 Jul 2018 17:19:19 -0400 Subject: [PATCH 0618/1291] x86/apic: Future-proof the TSC_DEADLINE quirk for SKX commit d9e6dbcf28f383bf08e6a3180972f5722e514a54 upstream. All SKX with stepping higher than 4 support the TSC_DEADLINE, no matter the microcode version. Without this patch, upcoming SKX steppings will not be able to use their TSC_DEADLINE timer. Signed-off-by: Len Brown Cc: # v4.14+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 616dd5872e ("x86/apic: Update TSC_DEADLINE quirk with additional SKX stepping") Link: http://lkml.kernel.org/r/d0c7129e509660be9ec6b233284b8d42d90659e8.1532207856.git.len.brown@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/apic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ebdcc368a2d3..f48a51335538 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -580,6 +580,9 @@ static u32 skx_deadline_rev(void) case 0x04: return 0x02000014; } + if (boot_cpu_data.x86_stepping > 4) + return 0; + return ~0U; } -- GitLab From c1a29c2d00c656fcc4abfeec5f4f92762e3c7437 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 22 Jul 2018 11:05:09 -0700 Subject: [PATCH 0619/1291] x86/entry/64: Remove %ebx handling from error_entry/exit commit b3681dd548d06deb2e1573890829dff4b15abf46 upstream. error_entry and error_exit communicate the user vs. kernel status of the frame using %ebx. This is unnecessary -- the information is in regs->cs. Just use regs->cs. This makes error_entry simpler and makes error_exit more robust. It also fixes a nasty bug. Before all the Spectre nonsense, the xen_failsafe_callback entry point returned like this: ALLOC_PT_GPREGS_ON_STACK SAVE_C_REGS SAVE_EXTRA_REGS ENCODE_FRAME_POINTER jmp error_exit And it did not go through error_entry. This was bogus: RBX contained garbage, and error_exit expected a flag in RBX. Fortunately, it generally contained *nonzero* garbage, so the correct code path was used. As part of the Spectre fixes, code was added to clear RBX to mitigate certain speculation attacks. Now, depending on kernel configuration, RBX got zeroed and, when running some Wine workloads, the kernel crashes. This was introduced by: commit 3ac6d8c787b8 ("x86/entry/64: Clear registers for exceptions/interrupts, to reduce speculation attack surface") With this patch applied, RBX is no longer needed as a flag, and the problem goes away. I suspect that malicious userspace could use this bug to crash the kernel even without the offending patch applied, though. [ Historical note: I wrote this patch as a cleanup before I was aware of the bug it fixed. ] [ Note to stable maintainers: this should probably get applied to all kernels. If you're nervous about that, a more conservative fix to add xorl %ebx,%ebx; incl %ebx before the jump to error_exit should also fix the problem. ] Reported-and-tested-by: M. Vefa Bicakci Signed-off-by: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Dominik Brodowski Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Cc: xen-devel@lists.xenproject.org Fixes: 3ac6d8c787b8 ("x86/entry/64: Clear registers for exceptions/interrupts, to reduce speculation attack surface") Link: http://lkml.kernel.org/r/b5010a090d3586b2d6e06c7ad3ec5542d1241c45.1532282627.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f7bfa701219b..0fae7096ae23 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -933,7 +933,7 @@ ENTRY(\sym) call \do_sym - jmp error_exit /* %ebx: no swapgs flag */ + jmp error_exit .endif END(\sym) .endm @@ -1166,7 +1166,6 @@ END(paranoid_exit) /* * Save all registers in pt_regs, and switch GS if needed. - * Return: EBX=0: came from user mode; EBX=1: otherwise */ ENTRY(error_entry) UNWIND_HINT_FUNC @@ -1213,7 +1212,6 @@ ENTRY(error_entry) * for these here too. */ .Lerror_kernelspace: - incl %ebx leaq native_irq_return_iret(%rip), %rcx cmpq %rcx, RIP+8(%rsp) je .Lerror_bad_iret @@ -1247,28 +1245,20 @@ ENTRY(error_entry) /* * Pretend that the exception came from user mode: set up pt_regs - * as if we faulted immediately after IRET and clear EBX so that - * error_exit knows that we will be returning to user mode. + * as if we faulted immediately after IRET. */ mov %rsp, %rdi call fixup_bad_iret mov %rax, %rsp - decl %ebx jmp .Lerror_entry_from_usermode_after_swapgs END(error_entry) - -/* - * On entry, EBX is a "return to kernel mode" flag: - * 1: already in kernel mode, don't need SWAPGS - * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode - */ ENTRY(error_exit) UNWIND_HINT_REGS DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF - testl %ebx, %ebx - jnz retint_kernel + testb $3, CS(%rsp) + jz retint_kernel jmp retint_user END(error_exit) -- GitLab From e5a16c6a6707f7ec905dc595cd3a7e37402735a2 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Thu, 19 Jul 2018 21:59:07 +0300 Subject: [PATCH 0620/1291] kvm: x86: vmx: fix vpid leak commit 63aff65573d73eb8dda4732ad4ef222dd35e4862 upstream. VPID for the nested vcpu is allocated at vmx_create_vcpu whenever nested vmx is turned on with the module parameter. However, it's only freed if the L1 guest has executed VMXON which is not a given. As a result, on a system with nested==on every creation+deletion of an L1 vcpu without running an L2 guest results in leaking one vpid. Since the total number of vpids is limited to 64k, they can eventually get exhausted, preventing L2 from starting. Delay allocation of the L2 vpid until VMXON emulation, thus matching its freeing. Fixes: 5c614b3583e7b6dab0c86356fa36c2bcbb8322a0 Cc: stable@vger.kernel.org Signed-off-by: Roman Kagan Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 90747865205d..8d000fde1414 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7354,6 +7354,8 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) HRTIMER_MODE_REL_PINNED); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; + vmx->nested.vpid02 = allocate_vpid(); + vmx->nested.vmxon = true; return 0; @@ -9802,10 +9804,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vmcs; } - if (nested) { + if (nested) nested_vmx_setup_ctls_msrs(vmx); - vmx->nested.vpid02 = allocate_vpid(); - } vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; @@ -9822,7 +9822,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) return &vmx->vcpu; free_vmcs: - free_vpid(vmx->nested.vpid02); free_loaded_vmcs(vmx->loaded_vmcs); free_msrs: kfree(vmx->guest_msrs); -- GitLab From a1b5bcffe4a4e6740545fe8ab4a3381da148ce89 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Wed, 25 Jul 2018 10:26:19 +0800 Subject: [PATCH 0621/1291] audit: fix potential null dereference 'context->module.name' commit b305f7ed0f4f494ad6f3ef5667501535d5a8fa31 upstream. The variable 'context->module.name' may be null pointer when kmalloc return null, so it's better to check it before using to avoid null dereference. Another one more thing this patch does is using kstrdup instead of (kmalloc + strcpy), and signal a lost record via audit_log_lost. Cc: stable@vger.kernel.org # 4.11 Signed-off-by: Yi Wang Reviewed-by: Jiang Biao Reviewed-by: Richard Guy Briggs Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- kernel/auditsc.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 677053a2fb57..76d789d6cea0 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1274,8 +1274,12 @@ static void show_special(struct audit_context *context, int *call_panic) break; case AUDIT_KERN_MODULE: audit_log_format(ab, "name="); - audit_log_untrustedstring(ab, context->module.name); - kfree(context->module.name); + if (context->module.name) { + audit_log_untrustedstring(ab, context->module.name); + kfree(context->module.name); + } else + audit_log_format(ab, "(null)"); + break; } audit_log_end(ab); @@ -2387,8 +2391,9 @@ void __audit_log_kern_module(char *name) { struct audit_context *context = current->audit_context; - context->module.name = kmalloc(strlen(name) + 1, GFP_KERNEL); - strcpy(context->module.name, name); + context->module.name = kstrdup(name, GFP_KERNEL); + if (!context->module.name) + audit_log_lost("out of memory in __audit_log_kern_module"); context->type = AUDIT_KERN_MODULE; } -- GitLab From 0eba9f5d3d4829fe5cc53abe90a73c33f47405af Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 2 Aug 2018 15:36:09 -0700 Subject: [PATCH 0622/1291] userfaultfd: remove uffd flags from vma->vm_flags if UFFD_EVENT_FORK fails commit 31e810aa1033a7db50a2746cd34a2432237f6420 upstream. The fix in commit 0cbb4b4f4c44 ("userfaultfd: clear the vma->vm_userfaultfd_ctx if UFFD_EVENT_FORK fails") cleared the vma->vm_userfaultfd_ctx but kept userfaultfd flags in vma->vm_flags that were copied from the parent process VMA. As the result, there is an inconsistency between the values of vma->vm_userfaultfd_ctx.ctx and vma->vm_flags which triggers BUG_ON in userfaultfd_release(). Clearing the uffd flags from vma->vm_flags in case of UFFD_EVENT_FORK failure resolves the issue. Link: http://lkml.kernel.org/r/1532931975-25473-1-git-send-email-rppt@linux.vnet.ibm.com Fixes: 0cbb4b4f4c44 ("userfaultfd: clear the vma->vm_userfaultfd_ctx if UFFD_EVENT_FORK fails") Signed-off-by: Mike Rapoport Reported-by: syzbot+121be635a7a35ddb7dcb@syzkaller.appspotmail.com Cc: Andrea Arcangeli Cc: Eric Biggers Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/userfaultfd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f6ed92524a03..3eda623e4cb4 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -628,8 +628,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, /* the various vma->vm_userfaultfd_ctx still points to it */ down_write(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) - if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) + if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING); + } up_write(&mm->mmap_sem); userfaultfd_ctx_put(release_new_ctx); -- GitLab From 4aa0acf290643c8161a0b6af4b5083a000124ff1 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 17 Jul 2018 13:43:56 +0300 Subject: [PATCH 0623/1291] iwlwifi: add more card IDs for 9000 series commit 0a5257bc6d89c2ae69b9bf955679cb4f89261874 upstream. Add new device IDs for the 9000 series. Cc: stable@vger.kernel.org # 4.14 Signed-off-by: Emmanuel Grumbach Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/cfg/9000.c | 69 +++++++++++++++++++ .../net/wireless/intel/iwlwifi/iwl-config.h | 5 ++ drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 22 ++++++ 3 files changed, 96 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c index 73da5e63a609..2c80c722feca 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c @@ -177,6 +177,17 @@ const struct iwl_cfg iwl9260_2ac_cfg = { .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, }; +const struct iwl_cfg iwl9260_killer_2ac_cfg = { + .name = "Killer (R) Wireless-AC 1550 Wireless Network Adapter (9260NGW)", + .fw_name_pre = IWL9260A_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, +}; + const struct iwl_cfg iwl9270_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9270", .fw_name_pre = IWL9260A_FW_PRE, @@ -266,6 +277,34 @@ const struct iwl_cfg iwl9560_2ac_cfg_soc = { .soc_latency = 5000, }; +const struct iwl_cfg iwl9560_killer_2ac_cfg_soc = { + .name = "Killer (R) Wireless-AC 1550i Wireless Network Adapter (9560NGW)", + .fw_name_pre = IWL9000A_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, + .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, + .soc_latency = 5000, +}; + +const struct iwl_cfg iwl9560_killer_s_2ac_cfg_soc = { + .name = "Killer (R) Wireless-AC 1550s Wireless Network Adapter (9560NGW)", + .fw_name_pre = IWL9000A_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, + .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, + .soc_latency = 5000, +}; + const struct iwl_cfg iwl9460_2ac_cfg_shared_clk = { .name = "Intel(R) Dual Band Wireless AC 9460", .fw_name_pre = IWL9000A_FW_PRE, @@ -326,6 +365,36 @@ const struct iwl_cfg iwl9560_2ac_cfg_shared_clk = { .extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK }; +const struct iwl_cfg iwl9560_killer_2ac_cfg_shared_clk = { + .name = "Killer (R) Wireless-AC 1550i Wireless Network Adapter (9560NGW)", + .fw_name_pre = IWL9000A_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, + .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, + .soc_latency = 5000, + .extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK +}; + +const struct iwl_cfg iwl9560_killer_s_2ac_cfg_shared_clk = { + .name = "Killer (R) Wireless-AC 1550s Wireless Network Adapter (9560NGW)", + .fw_name_pre = IWL9000A_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, + .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, + .soc_latency = 5000, + .extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK +}; + MODULE_FIRMWARE(IWL9000A_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL9000B_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL9000RFB_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 85fe1a928adc..70f3c327eb4a 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -470,6 +470,7 @@ extern const struct iwl_cfg iwl8265_2ac_sdio_cfg; extern const struct iwl_cfg iwl4165_2ac_sdio_cfg; extern const struct iwl_cfg iwl9160_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; +extern const struct iwl_cfg iwl9260_killer_2ac_cfg; extern const struct iwl_cfg iwl9270_2ac_cfg; extern const struct iwl_cfg iwl9460_2ac_cfg; extern const struct iwl_cfg iwl9560_2ac_cfg; @@ -477,10 +478,14 @@ extern const struct iwl_cfg iwl9460_2ac_cfg_soc; extern const struct iwl_cfg iwl9461_2ac_cfg_soc; extern const struct iwl_cfg iwl9462_2ac_cfg_soc; extern const struct iwl_cfg iwl9560_2ac_cfg_soc; +extern const struct iwl_cfg iwl9560_killer_2ac_cfg_soc; +extern const struct iwl_cfg iwl9560_killer_s_2ac_cfg_soc; extern const struct iwl_cfg iwl9460_2ac_cfg_shared_clk; extern const struct iwl_cfg iwl9461_2ac_cfg_shared_clk; extern const struct iwl_cfg iwl9462_2ac_cfg_shared_clk; extern const struct iwl_cfg iwl9560_2ac_cfg_shared_clk; +extern const struct iwl_cfg iwl9560_killer_2ac_cfg_shared_clk; +extern const struct iwl_cfg iwl9560_killer_s_2ac_cfg_shared_clk; extern const struct iwl_cfg iwla000_2ac_cfg_hr; extern const struct iwl_cfg iwla000_2ac_cfg_hr_cdb; extern const struct iwl_cfg iwla000_2ac_cfg_jf; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 9a8605abb00a..4cbc6cb8bf89 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -543,6 +543,9 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2526, 0x1210, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x1420, iwl9460_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2526, 0x1550, iwl9260_killer_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2526, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x1610, iwl9270_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x2034, iwl9560_2ac_cfg_soc)}, @@ -552,6 +555,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x42A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2526, 0x8014, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x271B, 0x0014, iwl9160_2ac_cfg)}, @@ -576,6 +580,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2720, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2720, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2720, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2720, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_cfg)}, @@ -602,6 +608,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x30DC, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x30DC, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x30DC, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_cfg_soc)}, @@ -628,6 +636,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x31DC, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x31DC, 0x1030, iwl9560_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x31DC, 0x1551, iwl9560_killer_s_2ac_cfg_shared_clk)}, + {IWL_PCI_DEVICE(0x31DC, 0x1552, iwl9560_killer_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x2030, iwl9560_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x2034, iwl9560_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x4030, iwl9560_2ac_cfg_shared_clk)}, @@ -654,6 +664,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x34F0, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x34F0, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x34F0, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x34F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x34F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x34F0, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x34F0, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x34F0, 0x4030, iwl9560_2ac_cfg_soc)}, @@ -680,6 +692,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x3DF0, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x3DF0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x3DF0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_cfg_soc)}, @@ -706,6 +720,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x43F0, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x43F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x43F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_cfg_soc)}, @@ -741,6 +757,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x9DF0, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x9DF0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9460_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x2034, iwl9560_2ac_cfg_soc)}, @@ -769,6 +787,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0xA0F0, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA0F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0xA0F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_cfg_soc)}, @@ -795,6 +815,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0xA370, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0xA370, 0x1030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0xA370, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x4030, iwl9560_2ac_cfg_soc)}, -- GitLab From 65be9cbe1224f1fe51591b44e3d35603b81b595c Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 11 Jul 2018 11:23:52 +0300 Subject: [PATCH 0624/1291] RDMA/uverbs: Expand primary and alt AV port checks commit addb8a6559f0f8b5a37582b7ca698358445a55bf upstream. The commit cited below checked that the port numbers provided in the primary and alt AVs are legal. That is sufficient to prevent a kernel panic. However, it is not sufficient for correct operation. In Linux, AVs (both primary and alt) must be completely self-described. We do not accept an AV from userspace without an embedded port number. (This has been the case since kernel 3.14 commit dbf727de7440 ("IB/core: Use GID table in AH creation and dmac resolution")). For the primary AV, this embedded port number must match the port number specified with IB_QP_PORT. We also expect the port number embedded in the alt AV to match the alt_port_num value passed by the userspace driver in the modify_qp command base structure. Add these checks to modify_qp. Cc: # 4.16 Fixes: 5d4c05c3ee36 ("RDMA/uverbs: Sanitize user entered port numbers prior to access it") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/uverbs_cmd.c | 59 +++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b8229d7b0ff5..f836ed1dd300 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1981,15 +1981,64 @@ static int modify_qp(struct ib_uverbs_file *file, goto release_qp; } - if ((cmd->base.attr_mask & IB_QP_AV) && - !rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) { - ret = -EINVAL; - goto release_qp; + if ((cmd->base.attr_mask & IB_QP_AV)) { + if (!rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) { + ret = -EINVAL; + goto release_qp; + } + + if (cmd->base.attr_mask & IB_QP_STATE && + cmd->base.qp_state == IB_QPS_RTR) { + /* We are in INIT->RTR TRANSITION (if we are not, + * this transition will be rejected in subsequent checks). + * In the INIT->RTR transition, we cannot have IB_QP_PORT set, + * but the IB_QP_STATE flag is required. + * + * Since kernel 3.14 (commit dbf727de7440), the uverbs driver, + * when IB_QP_AV is set, has required inclusion of a valid + * port number in the primary AV. (AVs are created and handled + * differently for infiniband and ethernet (RoCE) ports). + * + * Check the port number included in the primary AV against + * the port number in the qp struct, which was set (and saved) + * in the RST->INIT transition. + */ + if (cmd->base.dest.port_num != qp->real_qp->port) { + ret = -EINVAL; + goto release_qp; + } + } else { + /* We are in SQD->SQD. (If we are not, this transition will + * be rejected later in the verbs layer checks). + * Check for both IB_QP_PORT and IB_QP_AV, these can be set + * together in the SQD->SQD transition. + * + * If only IP_QP_AV was set, add in IB_QP_PORT as well (the + * verbs layer driver does not track primary port changes + * resulting from path migration. Thus, in SQD, if the primary + * AV is modified, the primary port should also be modified). + * + * Note that in this transition, the IB_QP_STATE flag + * is not allowed. + */ + if (((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT)) + == (IB_QP_AV | IB_QP_PORT)) && + cmd->base.port_num != cmd->base.dest.port_num) { + ret = -EINVAL; + goto release_qp; + } + if ((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT)) + == IB_QP_AV) { + cmd->base.attr_mask |= IB_QP_PORT; + cmd->base.port_num = cmd->base.dest.port_num; + } + } } if ((cmd->base.attr_mask & IB_QP_ALT_PATH) && (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) || - !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num))) { + !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num) || + cmd->base.alt_port_num != cmd->base.alt_dest.port_num)) { ret = -EINVAL; goto release_qp; } -- GitLab From 51ef850c78bb311e7bc5e6aada5756fbf71d6b08 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 13 Jul 2018 16:12:32 +0800 Subject: [PATCH 0625/1291] crypto: padlock-aes - Fix Nano workaround data corruption commit 46d8c4b28652d35dc6cfb5adf7f54e102fc04384 upstream. This was detected by the self-test thanks to Ard's chunking patch. I finally got around to testing this out on my ancient Via box. It turns out that the workaround got the assembly wrong and we end up doing count + initial cycles of the loop instead of just count. This obviously causes corruption, either by overwriting the source that is yet to be processed, or writing over the end of the buffer. On CPUs that don't require the workaround only ECB is affected. On Nano CPUs both ECB and CBC are affected. This patch fixes it by doing the subtraction prior to the assembly. Fixes: a76c1c23d0c3 ("crypto: padlock-aes - work around Nano CPU...") Cc: Reported-by: Jamie Heilman Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/padlock-aes.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index c939f18f70cc..7685f557dcc0 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -266,6 +266,8 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, return; } + count -= initial; + if (initial) asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ : "+S"(input), "+D"(output) @@ -273,7 +275,7 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ : "+S"(input), "+D"(output) - : "d"(control_word), "b"(key), "c"(count - initial)); + : "d"(control_word), "b"(key), "c"(count)); } static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, @@ -284,6 +286,8 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, if (count < cbc_fetch_blocks) return cbc_crypt(input, output, key, iv, control_word, count); + count -= initial; + if (initial) asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ : "+S" (input), "+D" (output), "+a" (iv) @@ -291,7 +295,7 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ : "+S" (input), "+D" (output), "+a" (iv) - : "d" (control_word), "b" (key), "c" (count-initial)); + : "d" (control_word), "b" (key), "c" (count)); return iv; } -- GitLab From 1a08888316d2d69d772391597fadde90d1cf1483 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 24 Jul 2018 15:36:01 +0200 Subject: [PATCH 0626/1291] drm/vc4: Reset ->{x, y}_scaling[1] when dealing with uniplanar formats commit a6a00918d4ad8718c3ccde38c02cec17f116b2fd upstream. This is needed to ensure ->is_unity is correct when the plane was previously configured to output a multi-planar format with scaling enabled, and is then being reconfigured to output a uniplanar format. Fixes: fc04023fafec ("drm/vc4: Add support for YUV planes.") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20180724133601.32114-1-boris.brezillon@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_plane.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 77c56264c05b..17590cb2b80d 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -352,6 +352,9 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) vc4_state->x_scaling[0] = VC4_SCALING_TPZ; if (vc4_state->y_scaling[0] == VC4_SCALING_NONE) vc4_state->y_scaling[0] = VC4_SCALING_TPZ; + } else { + vc4_state->x_scaling[1] = VC4_SCALING_NONE; + vc4_state->y_scaling[1] = VC4_SCALING_NONE; } vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && -- GitLab From b4653a3ea3d7e68d4118f0476c72f70288c6c7f4 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Thu, 12 Jul 2018 16:30:45 -0400 Subject: [PATCH 0627/1291] scsi: sg: fix minor memory leak in error path commit c170e5a8d222537e98aa8d4fddb667ff7a2ee114 upstream. Fix a minor memory leak when there is an error opening a /dev/sg device. Fixes: cc833acbee9d ("sg: O_EXCL and other lock handling") Cc: Reviewed-by: Ewan D. Milne Signed-off-by: Tony Battersby Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 4d49fb8f2bbc..3a406b40f150 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -2186,6 +2186,7 @@ sg_add_sfp(Sg_device * sdp) write_lock_irqsave(&sdp->sfd_lock, iflags); if (atomic_read(&sdp->detaching)) { write_unlock_irqrestore(&sdp->sfd_lock, iflags); + kfree(sfp); return ERR_PTR(-ENODEV); } list_add_tail(&sfp->sfd_siblings, &sdp->sfds); -- GitLab From 2ae6c0413b4768f9d8fc6f718a732f9dae014b67 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 6 Aug 2018 16:20:52 +0200 Subject: [PATCH 0628/1291] Linux 4.14.61 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5b48ec630990..4bd65eabd298 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 60 +SUBLEVEL = 61 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 43d7c954b22e87f16a8304518d776f454f584578 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 18 Jul 2018 14:29:51 -0700 Subject: [PATCH 0629/1291] scsi: qla2xxx: Fix unintialized List head crash commit e3dde080ebbdbb4bda8eee35d770714fee8c59ac upstream. In case of IOCB Queue full or system where memory is low and driver receives large number of RSCN storm, the stale sp pointer can stay on gpnid_list resulting in page_fault. This patch fixes this issue by initializing the sp->elem list head and removing sp->elem before memory is freed. Following stack trace is seen 9 [ffff987b37d1bc60] page_fault at ffffffffad516768 [exception RIP: qla24xx_async_gpnid+496] 10 [ffff987b37d1bd10] qla24xx_async_gpnid at ffffffffc039866d [qla2xxx] 11 [ffff987b37d1bd80] qla2x00_do_work at ffffffffc036169c [qla2xxx] 12 [ffff987b37d1be38] qla2x00_do_dpc_all_vps at ffffffffc03adfed [qla2xxx] 13 [ffff987b37d1be78] qla2x00_do_dpc at ffffffffc036458a [qla2xxx] 14 [ffff987b37d1bec8] kthread at ffffffffacebae31 Fixes: 2d73ac6102d9 ("scsi: qla2xxx: Serialize GPNID for multiple RSCN") Cc: # v4.17+ Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_gs.c | 4 ++++ drivers/scsi/qla2xxx/qla_inline.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index 59ecc4eda6cd..2a19ec0660cb 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -3368,6 +3368,10 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id) return rval; done_free_sp: + spin_lock_irqsave(&vha->hw->vport_slock, flags); + list_del(&sp->elem); + spin_unlock_irqrestore(&vha->hw->vport_slock, flags); + if (sp->u.iocb_cmd.u.ctarg.req) { dma_free_coherent(&vha->hw->pdev->dev, sizeof(struct ct_sns_pkt), diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index 9a2c86eacf44..3f5a0f0f8b62 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -221,6 +221,8 @@ qla2xxx_get_qpair_sp(struct qla_qpair *qpair, fc_port_t *fcport, gfp_t flag) sp->fcport = fcport; sp->iocbs = 1; sp->vha = qpair->vha; + INIT_LIST_HEAD(&sp->elem); + done: if (!sp) QLA_QPAIR_MARK_NOT_BUSY(qpair); -- GitLab From 01cda405c88b5ce1ff8c7d4006ec23ade2b0a507 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 18 Jul 2018 14:29:52 -0700 Subject: [PATCH 0630/1291] scsi: qla2xxx: Fix NPIV deletion by calling wait_for_sess_deletion commit efa93f48fa9d423fda166bc3b6c0cbb09682492e upstream. Add wait for session deletion to finish before freeing an NPIV scsi host. Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_attr.c | 1 + drivers/scsi/qla2xxx/qla_gbl.h | 1 + drivers/scsi/qla2xxx/qla_mid.c | 5 +++++ drivers/scsi/qla2xxx/qla_os.c | 2 +- 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 9ce28c4f9812..b09d29931393 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2142,6 +2142,7 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) msleep(1000); qla24xx_disable_vp(vha); + qla2x00_wait_for_sess_deletion(vha); vha->flags.delete_progress = 1; diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index f852ca60c49f..89706341514e 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -200,6 +200,7 @@ void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *, uint16_t *); int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *); int qla24xx_async_abort_cmd(srb_t *); +void qla2x00_wait_for_sess_deletion(scsi_qla_host_t *); /* * Global Functions in qla_mid.c source file. diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index d77dde89118e..375a88e18afe 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -152,10 +152,15 @@ qla24xx_disable_vp(scsi_qla_host_t *vha) { unsigned long flags; int ret; + fc_port_t *fcport; ret = qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL); atomic_set(&vha->loop_state, LOOP_DOWN); atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); + list_for_each_entry(fcport, &vha->vp_fcports, list) + fcport->logout_on_delete = 0; + + qla2x00_mark_all_devices_lost(vha, 0); /* Remove port id from vp target map */ spin_lock_irqsave(&vha->hw->vport_slock, flags); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 1be76695e692..6dbc2ab9090b 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1136,7 +1136,7 @@ static inline int test_fcport_count(scsi_qla_host_t *vha) * qla2x00_wait_for_sess_deletion can only be called from remove_one. * it has dependency on UNLOADING flag to stop device discovery */ -static void +void qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha) { qla2x00_mark_all_devices_lost(vha, 0); -- GitLab From f70766f1338d534638914388529cd6a7263db858 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 18 Jul 2018 14:29:54 -0700 Subject: [PATCH 0631/1291] scsi: qla2xxx: Fix ISP recovery on unload commit b08abbd9f5996309f021684f9ca74da30dcca36a upstream. During unload process, the chip can encounter problem where a FW dump would be captured. For this case, the full reset sequence will be skip to bring the chip back to full operational state. Fixes: e315cd28b9ef ("[SCSI] qla2xxx: Code changes for qla data structure refactoring") Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_os.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 6dbc2ab9090b..7d7fb5bbb600 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -5794,8 +5794,9 @@ qla2x00_do_dpc(void *data) set_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags); } - if (test_and_clear_bit(ISP_ABORT_NEEDED, - &base_vha->dpc_flags)) { + if (test_and_clear_bit + (ISP_ABORT_NEEDED, &base_vha->dpc_flags) && + !test_bit(UNLOADING, &base_vha->dpc_flags)) { ql_dbg(ql_dbg_dpc, base_vha, 0x4007, "ISP abort scheduled.\n"); -- GitLab From a96feef5b07141f304a3e933da2abbd8dfdf5bf1 Mon Sep 17 00:00:00 2001 From: Anil Gurumurthy Date: Wed, 18 Jul 2018 14:29:55 -0700 Subject: [PATCH 0632/1291] scsi: qla2xxx: Return error when TMF returns commit b4146c4929ef61d5afca011474d59d0918a0cd82 upstream. Propagate the task management completion status properly to avoid unnecessary waits for commands to complete. Fixes: faef62d13463 ("[SCSI] qla2xxx: Fix Task Management command asynchronous handling") Cc: Signed-off-by: Anil Gurumurthy Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index bcde6130f121..1d42d38f5a45 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1326,11 +1326,10 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, wait_for_completion(&tm_iocb->u.tmf.comp); - rval = tm_iocb->u.tmf.comp_status == CS_COMPLETE ? - QLA_SUCCESS : QLA_FUNCTION_FAILED; + rval = tm_iocb->u.tmf.data; - if ((rval != QLA_SUCCESS) || tm_iocb->u.tmf.data) { - ql_dbg(ql_dbg_taskm, vha, 0x8030, + if (rval != QLA_SUCCESS) { + ql_log(ql_log_warn, vha, 0x8030, "TM IOCB failed (%x).\n", rval); } -- GitLab From a6d9dacf4e4487192410435cb1e063c3b79b18c7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Aug 2018 14:44:59 +0200 Subject: [PATCH 0633/1291] genirq: Make force irq threading setup more robust commit d1f0301b3333eef5efbfa1fe0f0edbea01863d5d upstream. The support of force threading interrupts which are set up with both a primary and a threaded handler wreckaged the setup of regular requested threaded interrupts (primary handler == NULL). The reason is that it does not check whether the primary handler is set to the default handler which wakes the handler thread. Instead it replaces the thread handler with the primary handler as it would do with force threaded interrupts which have been requested via request_irq(). So both the primary and the thread handler become the same which then triggers the warnon that the thread handler tries to wakeup a not configured secondary thread. Fortunately this only happens when the driver omits the IRQF_ONESHOT flag when requesting the threaded interrupt, which is normaly caught by the sanity checks when force irq threading is disabled. Fix it by skipping the force threading setup when a regular threaded interrupt is requested. As a consequence the interrupt request which lacks the IRQ_ONESHOT flag is rejected correctly instead of silently wreckaging it. Fixes: 2a1d3ab8986d ("genirq: Handle force threading of irqs with primary and thread handler") Reported-by: Kurt Kanzenbach Signed-off-by: Thomas Gleixner Tested-by: Kurt Kanzenbach Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- kernel/irq/manage.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index b02caa442776..069311541577 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1030,6 +1030,13 @@ static int irq_setup_forced_threading(struct irqaction *new) if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)) return 0; + /* + * No further action required for interrupts which are requested as + * threaded interrupts already + */ + if (new->handler == irq_default_primary_handler) + return 0; + new->flags |= IRQF_ONESHOT; /* @@ -1037,7 +1044,7 @@ static int irq_setup_forced_threading(struct irqaction *new) * thread handler. We force thread them as well by creating a * secondary action. */ - if (new->handler != irq_default_primary_handler && new->thread_fn) { + if (new->handler && new->thread_fn) { /* Allocate the secondary action */ new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL); if (!new->secondary) -- GitLab From e5bcbedadfd9646c862062071ade0612a78a70e3 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Tue, 31 Jul 2018 18:13:58 +0200 Subject: [PATCH 0634/1291] nohz: Fix local_timer_softirq_pending() commit 80d20d35af1edd632a5e7a3b9c0ab7ceff92769e upstream. local_timer_softirq_pending() checks whether the timer softirq is pending with: local_softirq_pending() & TIMER_SOFTIRQ. This is wrong because TIMER_SOFTIRQ is the softirq number and not a bitmask. So the test checks for the wrong bit. Use BIT(TIMER_SOFTIRQ) instead. Fixes: 5d62c183f9e9 ("nohz: Prevent a timer interrupt storm in tick_nohz_stop_sched_tick()") Signed-off-by: Anna-Maria Gleixner Signed-off-by: Thomas Gleixner Reviewed-by: Paul E. McKenney Reviewed-by: Daniel Bristot de Oliveira Acked-by: Frederic Weisbecker Cc: bigeasy@linutronix.de Cc: peterz@infradead.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180731161358.29472-1-anna-maria@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index bb2af74e6b62..ea3c062e7e1c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -676,7 +676,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) static inline bool local_timer_softirq_pending(void) { - return local_softirq_pending() & TIMER_SOFTIRQ; + return local_softirq_pending() & BIT(TIMER_SOFTIRQ); } static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, -- GitLab From 2d898915ccf4838c04531c51a598469e921a5eb5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 3 Aug 2018 15:31:34 +0200 Subject: [PATCH 0635/1291] nohz: Fix missing tick reprogram when interrupting an inline softirq commit 0a0e0829f990120cef165bbb804237f400953ec2 upstream. The full nohz tick is reprogrammed in irq_exit() only if the exit is not in a nesting interrupt. This stands as an optimization: whether a hardirq or a softirq is interrupted, the tick is going to be reprogrammed when necessary at the end of the inner interrupt, with even potential new updates on the timer queue. When soft interrupts are interrupted, it's assumed that they are executing on the tail of an interrupt return. In that case tick_nohz_irq_exit() is called after softirq processing to take care of the tick reprogramming. But the assumption is wrong: softirqs can be processed inline as well, ie: outside of an interrupt, like in a call to local_bh_enable() or from ksoftirqd. Inline softirqs don't reprogram the tick once they are done, as opposed to interrupt tail softirq processing. So if a tick interrupts an inline softirq processing, the next timer will neither be reprogrammed from the interrupting tick's irq_exit() nor after the interrupted softirq processing. This situation may leave the tick unprogrammed while timers are armed. To fix this, simply keep reprogramming the tick even if a softirq has been interrupted. That can be optimized further, but for now correctness is more important. Note that new timers enqueued in nohz_full mode after a softirq gets interrupted will still be handled just fine through self-IPIs triggered by the timer code. Reported-by: Anna-Maria Gleixner Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Tested-by: Anna-Maria Gleixner Cc: stable@vger.kernel.org # 4.14+ Link: https://lkml.kernel.org/r/1533303094-15855-1-git-send-email-frederic@kernel.org Signed-off-by: Greg Kroah-Hartman --- kernel/softirq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index e89c3b0cff6d..f40ac7191257 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -382,7 +382,7 @@ static inline void tick_irq_exit(void) /* Make sure that timer wheel updates are propagated */ if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) { - if (!in_interrupt()) + if (!in_irq()) tick_nohz_irq_exit(); } #endif -- GitLab From 09901e570c9e9d6bf302716edd3a5a14417a0657 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sun, 5 Aug 2018 01:35:53 +0100 Subject: [PATCH 0636/1291] netlink: Don't shift on 64 for ngroups commit 91874ecf32e41b5d86a4cb9d60e0bee50d828058 upstream. It's legal to have 64 groups for netlink_sock. As user-supplied nladdr->nl_groups is __u32, it's possible to subscribe only to first 32 groups. The check for correctness of .bind() userspace supplied parameter is done by applying mask made from ngroups shift. Which broke Android as they have 64 groups and the shift for mask resulted in an overflow. Fixes: 61f4b23769f0 ("netlink: Don't shift with UB on nlk->ngroups") Cc: "David S. Miller" Cc: Herbert Xu Cc: Steffen Klassert Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org Reported-and-Tested-by: Nathan Chancellor Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 68c9d1833b95..c67abda5d639 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -981,8 +981,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->ngroups == 0) groups = 0; - else - groups &= (1ULL << nlk->ngroups) - 1; + else if (nlk->ngroups < 8*sizeof(groups)) + groups &= (1UL << nlk->ngroups) - 1; bound = nlk->bound; if (bound) { -- GitLab From dd69abaccbf78ba645ba46604a59b944695a2cb8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 8 Jul 2018 19:35:02 -0400 Subject: [PATCH 0637/1291] ext4: fix false negatives *and* false positives in ext4_check_descriptors() commit 44de022c4382541cebdd6de4465d1f4f465ff1dd upstream. Ext4_check_descriptors() was getting called before s_gdb_count was initialized. So for file systems w/o the meta_bg feature, allocation bitmaps could overlap the block group descriptors and ext4 wouldn't notice. For file systems with the meta_bg feature enabled, there was a fencepost error which would cause the ext4_check_descriptors() to incorrectly believe that the block allocation bitmap overlaps with the block group descriptor blocks, and it would reject the mount. Fix both of these problems. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Benjamin Gilbert Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6b0c1ea95196..f30d2bf40471 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2301,7 +2301,7 @@ static int ext4_check_descriptors(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); ext4_fsblk_t last_block; - ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0) + 1; + ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0); ext4_fsblk_t block_bitmap; ext4_fsblk_t inode_bitmap; ext4_fsblk_t inode_table; @@ -4038,13 +4038,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount2; } } + sbi->s_gdb_count = db_count; if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); ret = -EFSCORRUPTED; goto failed_mount2; } - sbi->s_gdb_count = db_count; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); -- GitLab From ff28e5cc58c2015ae65ad0070a3c90898f69aa3b Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 14 Sep 2017 16:50:14 +0200 Subject: [PATCH 0638/1291] ACPI / PCI: Bail early in acpi_pci_add_bus() if there is no ACPI handle commit a0040c0145945d3bd203df8fa97f6dfa819f3f7d upstream. Hyper-V instances support PCI pass-through which is implemented through PV pci-hyperv driver. When a device is passed through, a new root PCI bus is created in the guest. The bus sits on top of VMBus and has no associated information in ACPI. acpi_pci_add_bus() in this case proceeds all the way to acpi_evaluate_dsm(), which reports ACPI: \: failed to evaluate _DSM (0x1001) While acpi_pci_slot_enumerate() and acpiphp_enumerate_slots() are protected against ACPI_HANDLE() being NULL and do nothing, acpi_evaluate_dsm() is not and gives us the error. It seems the correct fix is to not do anything in acpi_pci_add_bus() in such cases. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Bjorn Helgaas Cc: Sinan Kaya Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index a8da543b3814..4708eb9df71b 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -624,7 +624,7 @@ void acpi_pci_add_bus(struct pci_bus *bus) union acpi_object *obj; struct pci_host_bridge *bridge; - if (acpi_pci_disabled || !bus->bridge) + if (acpi_pci_disabled || !bus->bridge || !ACPI_HANDLE(bus->bridge)) return; acpi_pci_slot_enumerate(bus); -- GitLab From 60baabc37bc18323e6346d28fd7b70729e345703 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 14 Jul 2018 01:28:15 +0900 Subject: [PATCH 0639/1291] ring_buffer: tracing: Inherit the tracing setting to next ring buffer commit 73c8d8945505acdcbae137c2e00a1232e0be709f upstream. Maintain the tracing on/off setting of the ring_buffer when switching to the trace buffer snapshot. Taking a snapshot is done by swapping the backup ring buffer (max_tr_buffer). But since the tracing on/off setting is defined by the ring buffer, when swapping it, the tracing on/off setting can also be changed. This causes a strange result like below: /sys/kernel/debug/tracing # cat tracing_on 1 /sys/kernel/debug/tracing # echo 0 > tracing_on /sys/kernel/debug/tracing # cat tracing_on 0 /sys/kernel/debug/tracing # echo 1 > snapshot /sys/kernel/debug/tracing # cat tracing_on 1 /sys/kernel/debug/tracing # echo 1 > snapshot /sys/kernel/debug/tracing # cat tracing_on 0 We don't touch tracing_on, but snapshot changes tracing_on setting each time. This is an anomaly, because user doesn't know that each "ring_buffer" stores its own tracing-enable state and the snapshot is done by swapping ring buffers. Link: http://lkml.kernel.org/r/153149929558.11274.11730609978254724394.stgit@devbox Cc: Ingo Molnar Cc: Shuah Khan Cc: Tom Zanussi Cc: Hiraku Toyooka Cc: stable@vger.kernel.org Fixes: debdd57f5145 ("tracing: Make a snapshot feature available from userspace") Signed-off-by: Masami Hiramatsu [ Updated commit log and comment in the code ] Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- include/linux/ring_buffer.h | 1 + kernel/trace/ring_buffer.c | 16 ++++++++++++++++ kernel/trace/trace.c | 6 ++++++ 3 files changed, 23 insertions(+) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 289e4d54e3e0..5caa062a02b2 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -160,6 +160,7 @@ void ring_buffer_record_enable(struct ring_buffer *buffer); void ring_buffer_record_off(struct ring_buffer *buffer); void ring_buffer_record_on(struct ring_buffer *buffer); int ring_buffer_record_is_on(struct ring_buffer *buffer); +int ring_buffer_record_is_set_on(struct ring_buffer *buffer); void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu); void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 36f018b15392..fd7809004297 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3109,6 +3109,22 @@ int ring_buffer_record_is_on(struct ring_buffer *buffer) return !atomic_read(&buffer->record_disabled); } +/** + * ring_buffer_record_is_set_on - return true if the ring buffer is set writable + * @buffer: The ring buffer to see if write is set enabled + * + * Returns true if the ring buffer is set writable by ring_buffer_record_on(). + * Note that this does NOT mean it is in a writable state. + * + * It may return true when the ring buffer has been disabled by + * ring_buffer_record_disable(), as that is a temporary disabling of + * the ring buffer. + */ +int ring_buffer_record_is_set_on(struct ring_buffer *buffer) +{ + return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF); +} + /** * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer * @buffer: The ring buffer to stop writes to. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e268750bd4ad..20919489883f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1366,6 +1366,12 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&tr->max_lock); + /* Inherit the recordable setting from trace_buffer */ + if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer)) + ring_buffer_record_on(tr->max_buffer.buffer); + else + ring_buffer_record_off(tr->max_buffer.buffer); + buf = tr->trace_buffer.buffer; tr->trace_buffer.buffer = tr->max_buffer.buffer; tr->max_buffer.buffer = buf; -- GitLab From ea464580fef7a334d19f27538bc690168a821b5d Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Mon, 9 Jul 2018 11:43:01 +0200 Subject: [PATCH 0640/1291] i2c: imx: Fix reinit_completion() use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9f9e3e0d4dd3338b3f3dde080789f71901e1e4ff upstream. Make sure to call reinit_completion() before dma is started to avoid race condition where reinit_completion() is called after complete() and before wait_for_completion_timeout(). Signed-off-by: Esben Haabendal Fixes: ce1a78840ff7 ("i2c: imx: add DMA support for freescale i2c driver") Reviewed-by: Uwe Kleine-König Signed-off-by: Wolfram Sang Cc: stable@kernel.org Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-imx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index f96830ffd9f1..75c6b98585ba 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -376,6 +376,7 @@ static int i2c_imx_dma_xfer(struct imx_i2c_struct *i2c_imx, goto err_desc; } + reinit_completion(&dma->cmd_complete); txdesc->callback = i2c_imx_dma_callback; txdesc->callback_param = i2c_imx; if (dma_submit_error(dmaengine_submit(txdesc))) { @@ -619,7 +620,6 @@ static int i2c_imx_dma_write(struct imx_i2c_struct *i2c_imx, * The first byte must be transmitted by the CPU. */ imx_i2c_write_reg(msgs->addr << 1, i2c_imx, IMX_I2C_I2DR); - reinit_completion(&i2c_imx->dma->cmd_complete); time_left = wait_for_completion_timeout( &i2c_imx->dma->cmd_complete, msecs_to_jiffies(DMA_TIMEOUT)); @@ -678,7 +678,6 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx, if (result) return result; - reinit_completion(&i2c_imx->dma->cmd_complete); time_left = wait_for_completion_timeout( &i2c_imx->dma->cmd_complete, msecs_to_jiffies(DMA_TIMEOUT)); -- GitLab From 0ea7fcfc7fe63de52eacb482c1ab47906b2d1251 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 12 Jul 2018 01:36:43 +0100 Subject: [PATCH 0641/1291] Btrfs: fix file data corruption after cloning a range and fsync commit bd3599a0e142cd73edd3b6801068ac3f48ac771a upstream. When we clone a range into a file we can end up dropping existing extent maps (or trimming them) and replacing them with new ones if the range to be cloned overlaps with a range in the destination inode. When that happens we add the new extent maps to the list of modified extents in the inode's extent map tree, so that a "fast" fsync (the flag BTRFS_INODE_NEEDS_FULL_SYNC not set in the inode) will see the extent maps and log corresponding extent items. However, at the end of range cloning operation we do truncate all the pages in the affected range (in order to ensure future reads will not get stale data). Sometimes this truncation will release the corresponding extent maps besides the pages from the page cache. If this happens, then a "fast" fsync operation will miss logging some extent items, because it relies exclusively on the extent maps being present in the inode's extent tree, leading to data loss/corruption if the fsync ends up using the same transaction used by the clone operation (that transaction was not committed in the meanwhile). An extent map is released through the callback btrfs_invalidatepage(), which gets called by truncate_inode_pages_range(), and it calls __btrfs_releasepage(). The later ends up calling try_release_extent_mapping() which will release the extent map if some conditions are met, like the file size being greater than 16Mb, gfp flags allow blocking and the range not being locked (which is the case during the clone operation) nor being the extent map flagged as pinned (also the case for cloning). The following example, turned into a test for fstests, reproduces the issue: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ xfs_io -f -c "pwrite -S 0x18 9000K 6908K" /mnt/foo $ xfs_io -f -c "pwrite -S 0x20 2572K 156K" /mnt/bar $ xfs_io -c "fsync" /mnt/bar # reflink destination offset corresponds to the size of file bar, # 2728Kb minus 4Kb. $ xfs_io -c ""reflink ${SCRATCH_MNT}/foo 0 2724K 15908K" /mnt/bar $ xfs_io -c "fsync" /mnt/bar $ md5sum /mnt/bar 95a95813a8c2abc9aa75a6c2914a077e /mnt/bar $ mount /dev/sdb /mnt $ md5sum /mnt/bar 207fd8d0b161be8a84b945f0df8d5f8d /mnt/bar # digest should be 95a95813a8c2abc9aa75a6c2914a077e like before the # power failure In the above example, the destination offset of the clone operation corresponds to the size of the "bar" file minus 4Kb. So during the clone operation, the extent map covering the range from 2572Kb to 2728Kb gets trimmed so that it ends at offset 2724Kb, and a new extent map covering the range from 2724Kb to 11724Kb is created. So at the end of the clone operation when we ask to truncate the pages in the range from 2724Kb to 2724Kb + 15908Kb, the page invalidation callback ends up removing the new extent map (through try_release_extent_mapping()) when the page at offset 2724Kb is passed to that callback. Fix this by setting the bit BTRFS_INODE_NEEDS_FULL_SYNC whenever an extent map is removed at try_release_extent_mapping(), forcing the next fsync to search for modified extents in the fs/subvolume tree instead of relying on the presence of extent maps in memory. This way we can continue doing a "fast" fsync if the destination range of a clone operation does not overlap with an existing range or if any of the criteria necessary to remove an extent map at try_release_extent_mapping() is not met (file size not bigger then 16Mb or gfp flags do not allow blocking). CC: stable@vger.kernel.org # 3.16+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7fa50e12f18e..5b62e06567a3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4280,6 +4280,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, struct extent_map *em; u64 start = page_offset(page); u64 end = start + PAGE_SIZE - 1; + struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host); if (gfpflags_allow_blocking(mask) && page->mapping->host->i_size > SZ_16M) { @@ -4302,6 +4303,8 @@ int try_release_extent_mapping(struct extent_map_tree *map, extent_map_end(em) - 1, EXTENT_LOCKED | EXTENT_WRITEBACK, 0, NULL)) { + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &btrfs_inode->runtime_flags); remove_extent_mapping(map, em); /* once for the rb tree */ free_extent_map(em); -- GitLab From 12c058df827213dcbbc42645fd5bcafb4057fb27 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 14 Jan 2018 12:39:01 +0200 Subject: [PATCH 0642/1291] nvme-pci: allocate device queues storage space at probe commit 147b27e4bd08406a6abebedbb478b431ec197be1 upstream. It may cause race by setting 'nvmeq' in nvme_init_request() because .init_request is called inside switching io scheduler, which may happen when the NVMe device is being resetted and its nvme queues are being freed and created. We don't have any sync between the two pathes. This patch changes the nvmeq allocation to occur at probe time so there is no way we can dereference it at init_request. [ 93.268391] kernel BUG at drivers/nvme/host/pci.c:408! [ 93.274146] invalid opcode: 0000 [#1] SMP [ 93.278618] Modules linked in: nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache sunrpc ipmi_ssif vfat fat intel_rapl sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel iTCO_wdt intel_cstate ipmi_si iTCO_vendor_support intel_uncore mxm_wmi mei_me ipmi_devintf intel_rapl_perf pcspkr sg ipmi_msghandler lpc_ich dcdbas mei shpchp acpi_power_meter wmi dm_multipath ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm ahci libahci nvme libata crc32c_intel nvme_core tg3 megaraid_sas ptp i2c_core pps_core dm_mirror dm_region_hash dm_log dm_mod [ 93.349071] CPU: 5 PID: 1842 Comm: sh Not tainted 4.15.0-rc2.ming+ #4 [ 93.356256] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.5.5 08/16/2017 [ 93.364801] task: 00000000fb8abf2a task.stack: 0000000028bd82d1 [ 93.371408] RIP: 0010:nvme_init_request+0x36/0x40 [nvme] [ 93.377333] RSP: 0018:ffffc90002537ca8 EFLAGS: 00010246 [ 93.383161] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000008 [ 93.391122] RDX: 0000000000000000 RSI: ffff880276ae0000 RDI: ffff88047bae9008 [ 93.399084] RBP: ffff88047bae9008 R08: ffff88047bae9008 R09: 0000000009dabc00 [ 93.407045] R10: 0000000000000004 R11: 000000000000299c R12: ffff880186bc1f00 [ 93.415007] R13: ffff880276ae0000 R14: 0000000000000000 R15: 0000000000000071 [ 93.422969] FS: 00007f33cf288740(0000) GS:ffff88047ba80000(0000) knlGS:0000000000000000 [ 93.431996] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 93.438407] CR2: 00007f33cf28e000 CR3: 000000047e5bb006 CR4: 00000000001606e0 [ 93.446368] Call Trace: [ 93.449103] blk_mq_alloc_rqs+0x231/0x2a0 [ 93.453579] blk_mq_sched_alloc_tags.isra.8+0x42/0x80 [ 93.459214] blk_mq_init_sched+0x7e/0x140 [ 93.463687] elevator_switch+0x5a/0x1f0 [ 93.467966] ? elevator_get.isra.17+0x52/0xc0 [ 93.472826] elv_iosched_store+0xde/0x150 [ 93.477299] queue_attr_store+0x4e/0x90 [ 93.481580] kernfs_fop_write+0xfa/0x180 [ 93.485958] __vfs_write+0x33/0x170 [ 93.489851] ? __inode_security_revalidate+0x4c/0x60 [ 93.495390] ? selinux_file_permission+0xda/0x130 [ 93.500641] ? _cond_resched+0x15/0x30 [ 93.504815] vfs_write+0xad/0x1a0 [ 93.508512] SyS_write+0x52/0xc0 [ 93.512113] do_syscall_64+0x61/0x1a0 [ 93.516199] entry_SYSCALL64_slow_path+0x25/0x25 [ 93.521351] RIP: 0033:0x7f33ce96aab0 [ 93.525337] RSP: 002b:00007ffe57570238 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 93.533785] RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 00007f33ce96aab0 [ 93.541746] RDX: 0000000000000006 RSI: 00007f33cf28e000 RDI: 0000000000000001 [ 93.549707] RBP: 00007f33cf28e000 R08: 000000000000000a R09: 00007f33cf288740 [ 93.557669] R10: 00007f33cf288740 R11: 0000000000000246 R12: 00007f33cec42400 [ 93.565630] R13: 0000000000000006 R14: 0000000000000001 R15: 0000000000000000 [ 93.573592] Code: 4c 8d 40 08 4c 39 c7 74 16 48 8b 00 48 8b 04 08 48 85 c0 74 16 48 89 86 78 01 00 00 31 c0 c3 8d 4a 01 48 63 c9 48 c1 e1 03 eb de <0f> 0b 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 85 f6 53 48 89 [ 93.594676] RIP: nvme_init_request+0x36/0x40 [nvme] RSP: ffffc90002537ca8 [ 93.602273] ---[ end trace 810dde3993e5f14e ]--- Reported-by: Yi Zhang Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jon Derrick Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 61 +++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f5643d107cc6..555d72020e27 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -77,7 +77,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); * Represents an NVM Express device. Each nvme_dev is a PCI function. */ struct nvme_dev { - struct nvme_queue **queues; + struct nvme_queue *queues; struct blk_mq_tag_set tagset; struct blk_mq_tag_set admin_tagset; u32 __iomem *dbs; @@ -348,7 +348,7 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { struct nvme_dev *dev = data; - struct nvme_queue *nvmeq = dev->queues[0]; + struct nvme_queue *nvmeq = &dev->queues[0]; WARN_ON(hctx_idx != 0); WARN_ON(dev->admin_tagset.tags[0] != hctx->tags); @@ -370,7 +370,7 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { struct nvme_dev *dev = data; - struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; + struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1]; if (!nvmeq->tags) nvmeq->tags = &dev->tagset.tags[hctx_idx]; @@ -386,7 +386,7 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, struct nvme_dev *dev = set->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0; - struct nvme_queue *nvmeq = dev->queues[queue_idx]; + struct nvme_queue *nvmeq = &dev->queues[queue_idx]; BUG_ON(!nvmeq); iod->nvmeq = nvmeq; @@ -900,7 +900,7 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx) { struct nvme_dev *dev = to_nvme_dev(ctrl); - struct nvme_queue *nvmeq = dev->queues[0]; + struct nvme_queue *nvmeq = &dev->queues[0]; struct nvme_command c; memset(&c, 0, sizeof(c)); @@ -1146,7 +1146,6 @@ static void nvme_free_queue(struct nvme_queue *nvmeq) if (nvmeq->sq_cmds) dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), nvmeq->sq_cmds, nvmeq->sq_dma_addr); - kfree(nvmeq); } static void nvme_free_queues(struct nvme_dev *dev, int lowest) @@ -1154,10 +1153,8 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) int i; for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) { - struct nvme_queue *nvmeq = dev->queues[i]; dev->ctrl.queue_count--; - dev->queues[i] = NULL; - nvme_free_queue(nvmeq); + nvme_free_queue(&dev->queues[i]); } } @@ -1189,10 +1186,8 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { - struct nvme_queue *nvmeq = dev->queues[0]; + struct nvme_queue *nvmeq = &dev->queues[0]; - if (!nvmeq) - return; if (nvme_suspend_queue(nvmeq)) return; @@ -1246,13 +1241,10 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, return 0; } -static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, - int depth, int node) +static int nvme_alloc_queue(struct nvme_dev *dev, int qid, + int depth, int node) { - struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL, - node); - if (!nvmeq) - return NULL; + struct nvme_queue *nvmeq = &dev->queues[qid]; nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth), &nvmeq->cq_dma_addr, GFP_KERNEL); @@ -1271,17 +1263,15 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->q_depth = depth; nvmeq->qid = qid; nvmeq->cq_vector = -1; - dev->queues[qid] = nvmeq; dev->ctrl.queue_count++; - return nvmeq; + return 0; free_cqdma: dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes, nvmeq->cq_dma_addr); free_nvmeq: - kfree(nvmeq); - return NULL; + return -ENOMEM; } static int queue_request_irq(struct nvme_queue *nvmeq) @@ -1468,14 +1458,12 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) if (result < 0) return result; - nvmeq = dev->queues[0]; - if (!nvmeq) { - nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, - dev_to_node(dev->dev)); - if (!nvmeq) - return -ENOMEM; - } + result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, + dev_to_node(dev->dev)); + if (result) + return result; + nvmeq = &dev->queues[0]; aqa = nvmeq->q_depth - 1; aqa |= aqa << 16; @@ -1505,7 +1493,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev) for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) { /* vector == qid - 1, match nvme_create_queue */ - if (!nvme_alloc_queue(dev, i, dev->q_depth, + if (nvme_alloc_queue(dev, i, dev->q_depth, pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) { ret = -ENOMEM; break; @@ -1514,7 +1502,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev) max = min(dev->max_qid, dev->ctrl.queue_count - 1); for (i = dev->online_queues; i <= max; i++) { - ret = nvme_create_queue(dev->queues[i], i); + ret = nvme_create_queue(&dev->queues[i], i); if (ret) break; } @@ -1770,7 +1758,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) static int nvme_setup_io_queues(struct nvme_dev *dev) { - struct nvme_queue *adminq = dev->queues[0]; + struct nvme_queue *adminq = &dev->queues[0]; struct pci_dev *pdev = to_pci_dev(dev->dev); int result, nr_io_queues; unsigned long size; @@ -1896,7 +1884,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev, int queues) retry: timeout = ADMIN_TIMEOUT; for (; i > 0; i--, sent++) - if (nvme_delete_queue(dev->queues[i], opcode)) + if (nvme_delete_queue(&dev->queues[i], opcode)) break; while (sent--) { @@ -2081,7 +2069,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) queues = dev->online_queues - 1; for (i = dev->ctrl.queue_count - 1; i > 0; i--) - nvme_suspend_queue(dev->queues[i]); + nvme_suspend_queue(&dev->queues[i]); if (dead) { /* A device might become IO incapable very soon during @@ -2089,7 +2077,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) * queue_count can be 0 here. */ if (dev->ctrl.queue_count) - nvme_suspend_queue(dev->queues[0]); + nvme_suspend_queue(&dev->queues[0]); } else { nvme_disable_io_queues(dev, queues); nvme_disable_admin_queue(dev, shutdown); @@ -2345,7 +2333,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); if (!dev) return -ENOMEM; - dev->queues = kzalloc_node((num_possible_cpus() + 1) * sizeof(void *), + + dev->queues = kzalloc_node((num_possible_cpus() + 1) * sizeof(struct nvme_queue), GFP_KERNEL, node); if (!dev->queues) goto free; -- GitLab From 4af9c61ad953ccc7dbd059a45e77e84db563bd41 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 23 Jan 2018 09:16:19 -0700 Subject: [PATCH 0643/1291] nvme-pci: Fix queue double allocations commit 62314e405fa101dbb82563394f9dfc225e3f1167 upstream. The queue count says the highest queue that's been allocated, so don't reallocate a queue lower than that. Fixes: 147b27e4bd0 ("nvme-pci: allocate device queues storage space at probe") Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Jon Derrick Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 555d72020e27..a67d03716510 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1246,6 +1246,9 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, { struct nvme_queue *nvmeq = &dev->queues[qid]; + if (dev->ctrl.queue_count > qid) + return 0; + nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth), &nvmeq->cq_dma_addr, GFP_KERNEL); if (!nvmeq->cqes) -- GitLab From d626ac9669f29a4cd4d16ccf27349688a6dda2fd Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 16 Jul 2018 14:38:14 -0700 Subject: [PATCH 0644/1291] nvmet-fc: fix target sgl list on large transfers commit d082dc1562a2ff0947b214796f12faaa87e816a9 upstream. The existing code to carve up the sg list expected an sg element-per-page which can be very incorrect with iommu's remapping multiple memory pages to fewer bus addresses. To hit this error required a large io payload (greater than 256k) and a system that maps on a per-page basis. It's possible that large ios could get by fine if the system condensed the sgl list into the first 64 elements. This patch corrects the sg list handling by specifically walking the sg list element by element and attempting to divide the transfer up on a per-sg element boundary. While doing so, it still tries to keep sequences under 256k, but will exceed that rule if a single sg element is larger than 256k. Fixes: 48fa362b6c3f ("nvmet-fc: simplify sg list handling") Cc: # 4.14 Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/fc.c | 44 ++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 8e21211b904b..b7a5d1065378 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -58,8 +58,8 @@ struct nvmet_fc_ls_iod { struct work_struct work; } __aligned(sizeof(unsigned long long)); +/* desired maximum for a single sequence - if sg list allows it */ #define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024) -#define NVMET_FC_MAX_XFR_SGENTS (NVMET_FC_MAX_SEQ_LENGTH / PAGE_SIZE) enum nvmet_fcp_datadir { NVMET_FCP_NODATA, @@ -74,6 +74,7 @@ struct nvmet_fc_fcp_iod { struct nvme_fc_cmd_iu cmdiubuf; struct nvme_fc_ersp_iu rspiubuf; dma_addr_t rspdma; + struct scatterlist *next_sg; struct scatterlist *data_sg; int data_sg_cnt; u32 total_length; @@ -1000,8 +1001,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, INIT_LIST_HEAD(&newrec->assoc_list); kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); - newrec->max_sg_cnt = min_t(u32, NVMET_FC_MAX_XFR_SGENTS, - template->max_sgl_segments); + newrec->max_sg_cnt = template->max_sgl_segments; ret = nvmet_fc_alloc_ls_iodlist(newrec); if (ret) { @@ -1717,6 +1717,7 @@ nvmet_fc_alloc_tgt_pgs(struct nvmet_fc_fcp_iod *fod) ((fod->io_dir == NVMET_FCP_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE)); /* note: write from initiator perspective */ + fod->next_sg = fod->data_sg; return 0; @@ -1874,24 +1875,49 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod, u8 op) { struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + struct scatterlist *sg = fod->next_sg; unsigned long flags; - u32 tlen; + u32 remaininglen = fod->total_length - fod->offset; + u32 tlen = 0; int ret; fcpreq->op = op; fcpreq->offset = fod->offset; fcpreq->timeout = NVME_FC_TGTOP_TIMEOUT_SEC; - tlen = min_t(u32, tgtport->max_sg_cnt * PAGE_SIZE, - (fod->total_length - fod->offset)); + /* + * for next sequence: + * break at a sg element boundary + * attempt to keep sequence length capped at + * NVMET_FC_MAX_SEQ_LENGTH but allow sequence to + * be longer if a single sg element is larger + * than that amount. This is done to avoid creating + * a new sg list to use for the tgtport api. + */ + fcpreq->sg = sg; + fcpreq->sg_cnt = 0; + while (tlen < remaininglen && + fcpreq->sg_cnt < tgtport->max_sg_cnt && + tlen + sg_dma_len(sg) < NVMET_FC_MAX_SEQ_LENGTH) { + fcpreq->sg_cnt++; + tlen += sg_dma_len(sg); + sg = sg_next(sg); + } + if (tlen < remaininglen && fcpreq->sg_cnt == 0) { + fcpreq->sg_cnt++; + tlen += min_t(u32, sg_dma_len(sg), remaininglen); + sg = sg_next(sg); + } + if (tlen < remaininglen) + fod->next_sg = sg; + else + fod->next_sg = NULL; + fcpreq->transfer_length = tlen; fcpreq->transferred_length = 0; fcpreq->fcp_error = 0; fcpreq->rsplen = 0; - fcpreq->sg = &fod->data_sg[fod->offset / PAGE_SIZE]; - fcpreq->sg_cnt = DIV_ROUND_UP(tlen, PAGE_SIZE); - /* * If the last READDATA request: check if LLDD supports * combined xfr with response. -- GitLab From a34399927da1efcb5e07273a4f0093b12d1dad01 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 9 Nov 2017 02:19:39 -0500 Subject: [PATCH 0645/1291] intel_idle: Graceful probe failure when MWAIT is disabled commit a4c447533a18ee86e07232d6344ba12b1f9c5077 upstream. When MWAIT is disabled, intel_idle refuses to probe. But it may mis-lead the user by blaming this on the model number: intel_idle: does not run on family 6 modesl 79 So defer the check for MWAIT until after the model# white-list check succeeds, and if the MWAIT check fails, tell the user how to fix it: intel_idle: Please enable MWAIT in BIOS SETUP Signed-off-by: Len Brown Signed-off-by: Rafael J. Wysocki Cc: Eduardo Valentin Signed-off-by: Greg Kroah-Hartman --- drivers/idle/intel_idle.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index f0b06b14e782..16249b0953ff 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1061,7 +1061,7 @@ static const struct idle_cpu idle_cpu_dnv = { }; #define ICPU(model, cpu) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu } + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&cpu } static const struct x86_cpu_id intel_idle_ids[] __initconst = { ICPU(INTEL_FAM6_NEHALEM_EP, idle_cpu_nehalem), @@ -1125,6 +1125,11 @@ static int __init intel_idle_probe(void) return -ENODEV; } + if (!boot_cpu_has(X86_FEATURE_MWAIT)) { + pr_debug("Please enable MWAIT in BIOS SETUP\n"); + return -ENODEV; + } + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) return -ENODEV; -- GitLab From 27c41b170183d5d4ebbb6e4fdf41f035ef8617f8 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 23 Mar 2018 10:22:53 -0700 Subject: [PATCH 0646/1291] xfs: catch inode allocation state mismatch corruption commit ee457001ed6c6f31ddad69c24c1da8f377d8472d upstream. We recently came across a V4 filesystem causing memory corruption due to a newly allocated inode being setup twice and being added to the superblock inode list twice. From code inspection, the only way this could happen is if a newly allocated inode was not marked as free on disk (i.e. di_mode wasn't zero). Running the metadump on an upstream debug kernel fails during inode allocation like so: XFS: Assertion failed: ip->i_d.di_nblocks == 0, file: fs/xfs/xfs_inod= e.c, line: 838 ------------[ cut here ]------------ kernel BUG at fs/xfs/xfs_message.c:114! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 11 PID: 3496 Comm: mkdir Not tainted 4.16.0-rc5-dgc #442 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/0= 1/2014 RIP: 0010:assfail+0x28/0x30 RSP: 0018:ffffc9000236fc80 EFLAGS: 00010202 RAX: 00000000ffffffea RBX: 0000000000004000 RCX: 0000000000000000 RDX: 00000000ffffffc0 RSI: 000000000000000a RDI: ffffffff8227211b RBP: ffffc9000236fce8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000bec R11: f000000000000000 R12: ffffc9000236fd30 R13: ffff8805c76bab80 R14: ffff8805c77ac800 R15: ffff88083fb12e10 FS: 00007fac8cbff040(0000) GS:ffff88083fd00000(0000) knlGS:0000000000000= 000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fffa6783ff8 CR3: 00000005c6e2b003 CR4: 00000000000606e0 Call Trace: xfs_ialloc+0x383/0x570 xfs_dir_ialloc+0x6a/0x2a0 xfs_create+0x412/0x670 xfs_generic_create+0x1f7/0x2c0 ? capable_wrt_inode_uidgid+0x3f/0x50 vfs_mkdir+0xfb/0x1b0 SyS_mkdir+0xcf/0xf0 do_syscall_64+0x73/0x1a0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 Extracting the inode number we crashed on from an event trace and looking at it with xfs_db: xfs_db> inode 184452204 xfs_db> p core.magic = 0x494e core.mode = 0100644 core.version = 2 core.format = 2 (extents) core.nlinkv2 = 1 core.onlink = 0 ..... Confirms that it is not a free inode on disk. xfs_repair also trips over this inode: ..... zero length extent (off = 0, fsbno = 0) in ino 184452204 correcting nextents for inode 184452204 bad attribute fork in inode 184452204, would clear attr fork bad nblocks 1 for inode 184452204, would reset to 0 bad anextents 1 for inode 184452204, would reset to 0 imap claims in-use inode 184452204 is free, would correct imap would have cleared inode 184452204 ..... disconnected inode 184452204, would move to lost+found And so we have a situation where the directory structure and the inobt thinks the inode is free, but the inode on disk thinks it is still in use. Where this corruption came from is not possible to diagnose, but we can detect it and prevent the kernel from oopsing on lookup. The reproducer now results in: $ sudo mkdir /mnt/scratch/{0,1,2,3,4,5}{0,1,2,3,4,5} mkdir: cannot create directory =E2=80=98/mnt/scratch/00=E2=80=99: File ex= ists mkdir: cannot create directory =E2=80=98/mnt/scratch/01=E2=80=99: File ex= ists mkdir: cannot create directory =E2=80=98/mnt/scratch/03=E2=80=99: Structu= re needs cleaning mkdir: cannot create directory =E2=80=98/mnt/scratch/04=E2=80=99: Input/o= utput error mkdir: cannot create directory =E2=80=98/mnt/scratch/05=E2=80=99: Input/o= utput error .... And this corruption shutdown: [ 54.843517] XFS (loop0): Corruption detected! Free inode 0xafe846c not= marked free on disk [ 54.845885] XFS (loop0): Internal error xfs_trans_cancel at line 1023 = of file fs/xfs/xfs_trans.c. Caller xfs_create+0x425/0x670 [ 54.848994] CPU: 10 PID: 3541 Comm: mkdir Not tainted 4.16.0-rc5-dgc #= 443 [ 54.850753] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIO= S 1.10.2-1 04/01/2014 [ 54.852859] Call Trace: [ 54.853531] dump_stack+0x85/0xc5 [ 54.854385] xfs_trans_cancel+0x197/0x1c0 [ 54.855421] xfs_create+0x425/0x670 [ 54.856314] xfs_generic_create+0x1f7/0x2c0 [ 54.857390] ? capable_wrt_inode_uidgid+0x3f/0x50 [ 54.858586] vfs_mkdir+0xfb/0x1b0 [ 54.859458] SyS_mkdir+0xcf/0xf0 [ 54.860254] do_syscall_64+0x73/0x1a0 [ 54.861193] entry_SYSCALL_64_after_hwframe+0x42/0xb7 [ 54.862492] RIP: 0033:0x7fb73bddf547 [ 54.863358] RSP: 002b:00007ffdaa553338 EFLAGS: 00000246 ORIG_RAX: 0000= 000000000053 [ 54.865133] RAX: ffffffffffffffda RBX: 00007ffdaa55449a RCX: 00007fb73= bddf547 [ 54.866766] RDX: 0000000000000001 RSI: 00000000000001ff RDI: 00007ffda= a55449a [ 54.868432] RBP: 00007ffdaa55449a R08: 00000000000001ff R09: 00005623a= 8670dd0 [ 54.870110] R10: 00007fb73be72d5b R11: 0000000000000246 R12: 000000000= 00001ff [ 54.871752] R13: 00007ffdaa5534b0 R14: 0000000000000000 R15: 00007ffda= a553500 [ 54.873429] XFS (loop0): xfs_do_force_shutdown(0x8) called from line 1= 024 of file fs/xfs/xfs_trans.c. Return address = ffffffff814cd050 [ 54.882790] XFS (loop0): Corruption of in-memory data detected. Shutt= ing down filesystem [ 54.884597] XFS (loop0): Please umount the filesystem and rectify the = problem(s) Note that this crash is only possible on v4 filesystemsi or v5 filesystems mounted with the ikeep mount option. For all other V5 filesystems, this problem cannot occur because we don't read inodes we are allocating from disk - we simply overwrite them with the new inode information. Signed-Off-By: Dave Chinner Reviewed-by: Carlos Maiolino Tested-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Cc: Eduardo Valentin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_icache.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 43005fbe8b1e..26f9e66a3ec9 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -475,7 +475,28 @@ xfs_iget_cache_miss( trace_xfs_iget_miss(ip); - if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) { + + /* + * If we are allocating a new inode, then check what was returned is + * actually a free, empty inode. If we are not allocating an inode, + * the check we didn't find a free inode. + */ + if (flags & XFS_IGET_CREATE) { + if (VFS_I(ip)->i_mode != 0) { + xfs_warn(mp, +"Corruption detected! Free inode 0x%llx not marked free on disk", + ino); + error = -EFSCORRUPTED; + goto out_destroy; + } + if (ip->i_d.di_nblocks != 0) { + xfs_warn(mp, +"Corruption detected! Free inode 0x%llx has blocks allocated!", + ino); + error = -EFSCORRUPTED; + goto out_destroy; + } + } else if (VFS_I(ip)->i_mode == 0) { error = -ENOENT; goto out_destroy; } -- GitLab From 6f021e4ef39ace7f58c415856aef9308c70e89b9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 17 Apr 2018 17:17:34 -0700 Subject: [PATCH 0647/1291] xfs: validate cached inodes are free when allocated commit afca6c5b2595fc44383919fba740c194b0b76aff upstream. A recent fuzzed filesystem image cached random dcache corruption when the reproducer was run. This often showed up as panics in lookup_slow() on a null inode->i_ops pointer when doing pathwalks. BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 .... Call Trace: lookup_slow+0x44/0x60 walk_component+0x3dd/0x9f0 link_path_walk+0x4a7/0x830 path_lookupat+0xc1/0x470 filename_lookup+0x129/0x270 user_path_at_empty+0x36/0x40 path_listxattr+0x98/0x110 SyS_listxattr+0x13/0x20 do_syscall_64+0xf5/0x280 entry_SYSCALL_64_after_hwframe+0x42/0xb7 but had many different failure modes including deadlocks trying to lock the inode that was just allocated or KASAN reports of use-after-free violations. The cause of the problem was a corrupt INOBT on a v4 fs where the root inode was marked as free in the inobt record. Hence when we allocated an inode, it chose the root inode to allocate, found it in the cache and re-initialised it. We recently fixed a similar inode allocation issue caused by inobt record corruption problem in xfs_iget_cache_miss() in commit ee457001ed6c ("xfs: catch inode allocation state mismatch corruption"). This change adds similar checks to the cache-hit path to catch it, and turns the reproducer into a corruption shutdown situation. Reported-by: Wen Xu Signed-Off-By: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong [darrick: fix typos in comment] Signed-off-by: Darrick J. Wong Cc: Eduardo Valentin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_icache.c | 73 +++++++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 25 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 26f9e66a3ec9..544b5211221c 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -305,6 +305,46 @@ xfs_reinit_inode( return error; } +/* + * If we are allocating a new inode, then check what was returned is + * actually a free, empty inode. If we are not allocating an inode, + * then check we didn't find a free inode. + * + * Returns: + * 0 if the inode free state matches the lookup context + * -ENOENT if the inode is free and we are not allocating + * -EFSCORRUPTED if there is any state mismatch at all + */ +static int +xfs_iget_check_free_state( + struct xfs_inode *ip, + int flags) +{ + if (flags & XFS_IGET_CREATE) { + /* should be a free inode */ + if (VFS_I(ip)->i_mode != 0) { + xfs_warn(ip->i_mount, +"Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)", + ip->i_ino, VFS_I(ip)->i_mode); + return -EFSCORRUPTED; + } + + if (ip->i_d.di_nblocks != 0) { + xfs_warn(ip->i_mount, +"Corruption detected! Free inode 0x%llx has blocks allocated!", + ip->i_ino); + return -EFSCORRUPTED; + } + return 0; + } + + /* should be an allocated inode */ + if (VFS_I(ip)->i_mode == 0) + return -ENOENT; + + return 0; +} + /* * Check the validity of the inode we just found it the cache */ @@ -354,12 +394,12 @@ xfs_iget_cache_hit( } /* - * If lookup is racing with unlink return an error immediately. + * Check the inode free state is valid. This also detects lookup + * racing with unlinks. */ - if (VFS_I(ip)->i_mode == 0 && !(flags & XFS_IGET_CREATE)) { - error = -ENOENT; + error = xfs_iget_check_free_state(ip, flags); + if (error) goto out_error; - } /* * If IRECLAIMABLE is set, we've torn down the VFS inode already. @@ -477,29 +517,12 @@ xfs_iget_cache_miss( /* - * If we are allocating a new inode, then check what was returned is - * actually a free, empty inode. If we are not allocating an inode, - * the check we didn't find a free inode. + * Check the inode free state is valid. This also detects lookup + * racing with unlinks. */ - if (flags & XFS_IGET_CREATE) { - if (VFS_I(ip)->i_mode != 0) { - xfs_warn(mp, -"Corruption detected! Free inode 0x%llx not marked free on disk", - ino); - error = -EFSCORRUPTED; - goto out_destroy; - } - if (ip->i_d.di_nblocks != 0) { - xfs_warn(mp, -"Corruption detected! Free inode 0x%llx has blocks allocated!", - ino); - error = -EFSCORRUPTED; - goto out_destroy; - } - } else if (VFS_I(ip)->i_mode == 0) { - error = -ENOENT; + error = xfs_iget_check_free_state(ip, flags); + if (error) goto out_destroy; - } /* * Preload the radix tree so we can insert safely under the -- GitLab From 59f35b983e8aeb98188c6ef93f8eabc594f8f953 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 8 Jun 2018 09:53:49 -0700 Subject: [PATCH 0648/1291] xfs: don't call xfs_da_shrink_inode with NULL bp commit bb3d48dcf86a97dc25fe9fc2c11938e19cb4399a upstream. xfs_attr3_leaf_create may have errored out before instantiating a buffer, for example if the blkno is out of range. In that case there is no work to do to remove it, and in fact xfs_da_shrink_inode will lead to an oops if we try. This also seems to fix a flaw where the original error from xfs_attr3_leaf_create gets overwritten in the cleanup case, and it removes a pointless assignment to bp which isn't used after this. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199969 Reported-by: Xu, Wen Tested-by: Xu, Wen Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Cc: Eduardo Valentin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_attr_leaf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 5c16db86b38f..40e53a4fc0a6 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -785,9 +785,8 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) ASSERT(blkno == 0); error = xfs_attr3_leaf_create(args, blkno, &bp); if (error) { - error = xfs_da_shrink_inode(args, 0, bp); - bp = NULL; - if (error) + /* xfs_attr3_leaf_create may not have instantiated a block */ + if (bp && (xfs_da_shrink_inode(args, 0, bp) != 0)) goto out; xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ -- GitLab From 7d29fb53439c8c91874550cc078eda6db8feafe7 Mon Sep 17 00:00:00 2001 From: Shankara Pailoor Date: Tue, 5 Jun 2018 08:33:27 -0500 Subject: [PATCH 0649/1291] jfs: Fix inconsistency between memory allocation and ea_buf->max_size commit 92d34134193e5b129dc24f8d79cb9196626e8d7a upstream. The code is assuming the buffer is max_size length, but we weren't allocating enough space for it. Signed-off-by: Shankara Pailoor Signed-off-by: Dave Kleikamp Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- fs/jfs/xattr.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index c60f3d32ee91..a6797986b625 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -491,15 +491,17 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) if (size > PSIZE) { /* * To keep the rest of the code simple. Allocate a - * contiguous buffer to work with + * contiguous buffer to work with. Make the buffer large + * enough to make use of the whole extent. */ - ea_buf->xattr = kmalloc(size, GFP_KERNEL); + ea_buf->max_size = (size + sb->s_blocksize - 1) & + ~(sb->s_blocksize - 1); + + ea_buf->xattr = kmalloc(ea_buf->max_size, GFP_KERNEL); if (ea_buf->xattr == NULL) return -ENOMEM; ea_buf->flag = EA_MALLOC; - ea_buf->max_size = (size + sb->s_blocksize - 1) & - ~(sb->s_blocksize - 1); if (ea_size == 0) return 0; -- GitLab From 1aa1166efaceef8972045b4aa25e6e0ab96d8a30 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Aug 2018 12:16:40 +0200 Subject: [PATCH 0650/1291] Linux 4.14.62 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4bd65eabd298..d407ecfdee0b 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 61 +SUBLEVEL = 62 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 9ffedb10183cb8f285356749205d584e7b2d57bb Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 28 Jul 2018 11:47:17 +0200 Subject: [PATCH 0651/1291] parisc: Enable CONFIG_MLONGCALLS by default commit 66509a276c8c1d19ee3f661a41b418d101c57d29 upstream. Enable the -mlong-calls compiler option by default, because otherwise in most cases linking the vmlinux binary fails due to truncations of R_PARISC_PCREL22F relocations. This fixes building the 64-bit defconfig. Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 1fd3eb5b66c6..89e684fd795f 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -201,7 +201,7 @@ config PREFETCH config MLONGCALLS bool "Enable the -mlong-calls compiler option for big kernels" - def_bool y if (!MODULES) + default y depends on PA8X00 help If you configure the kernel to include many drivers built-in instead -- GitLab From 2e56b37b1dd29de3312919c75f824fecada41634 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sun, 5 Aug 2018 13:30:31 -0400 Subject: [PATCH 0652/1291] parisc: Define mb() and add memory barriers to assembler unlock sequences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fedb8da96355f5f64353625bf96dc69423ad1826 upstream. For years I thought all parisc machines executed loads and stores in order. However, Jeff Law recently indicated on gcc-patches that this is not correct. There are various degrees of out-of-order execution all the way back to the PA7xxx processor series (hit-under-miss). The PA8xxx series has full out-of-order execution for both integer operations, and loads and stores. This is described in the following article: http://web.archive.org/web/20040214092531/http://www.cpus.hp.com/technical_references/advperf.shtml For this reason, we need to define mb() and to insert a memory barrier before the store unlocking spinlocks. This ensures that all memory accesses are complete prior to unlocking. The ldcw instruction performs the same function on entry. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/barrier.h | 32 +++++++++++++++++++++++++++++++ arch/parisc/kernel/entry.S | 2 ++ arch/parisc/kernel/pacache.S | 1 + arch/parisc/kernel/syscall.S | 4 ++++ 4 files changed, 39 insertions(+) create mode 100644 arch/parisc/include/asm/barrier.h diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h new file mode 100644 index 000000000000..dbaaca84f27f --- /dev/null +++ b/arch/parisc/include/asm/barrier.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +#ifndef __ASSEMBLY__ + +/* The synchronize caches instruction executes as a nop on systems in + which all memory references are performed in order. */ +#define synchronize_caches() __asm__ __volatile__ ("sync" : : : "memory") + +#if defined(CONFIG_SMP) +#define mb() do { synchronize_caches(); } while (0) +#define rmb() mb() +#define wmb() mb() +#define dma_rmb() mb() +#define dma_wmb() mb() +#else +#define mb() barrier() +#define rmb() barrier() +#define wmb() barrier() +#define dma_rmb() barrier() +#define dma_wmb() barrier() +#endif + +#define __smp_mb() mb() +#define __smp_rmb() mb() +#define __smp_wmb() mb() + +#include + +#endif /* !__ASSEMBLY__ */ +#endif /* __ASM_BARRIER_H */ diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index e95207c0565e..1b4732e20137 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -481,6 +481,8 @@ /* Release pa_tlb_lock lock without reloading lock address. */ .macro tlb_unlock0 spc,tmp #ifdef CONFIG_SMP + or,COND(=) %r0,\spc,%r0 + sync or,COND(=) %r0,\spc,%r0 stw \spc,0(\tmp) #endif diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index 67b0f7532e83..3e163df49cf3 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -354,6 +354,7 @@ ENDPROC_CFI(flush_data_cache_local) .macro tlb_unlock la,flags,tmp #ifdef CONFIG_SMP ldi 1,\tmp + sync stw \tmp,0(\la) mtsm \flags #endif diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index e775f80ae28c..4886a6db42e9 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -633,6 +633,7 @@ cas_action: sub,<> %r28, %r25, %r0 2: stw,ma %r24, 0(%r26) /* Free lock */ + sync stw,ma %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG /* Clear thread register indicator */ @@ -647,6 +648,7 @@ cas_action: 3: /* Error occurred on load or store */ /* Free lock */ + sync stw %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG stw %r0, 4(%sr2,%r20) @@ -848,6 +850,7 @@ cas2_action: cas2_end: /* Free lock */ + sync stw,ma %r20, 0(%sr2,%r20) /* Enable interrupts */ ssm PSW_SM_I, %r0 @@ -858,6 +861,7 @@ cas2_end: 22: /* Error occurred on load or store */ /* Free lock */ + sync stw %r20, 0(%sr2,%r20) ssm PSW_SM_I, %r0 ldo 1(%r0),%r28 -- GitLab From 1edd825c11f8ed2c409d6fb6b3d90a042cbf738d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 13 Mar 2018 17:42:39 +0800 Subject: [PATCH 0653/1291] scsi: hpsa: fix selection of reply queue commit 8b834bff1b73dce46f4e9f5e84af6f73fed8b0ef upstream. Since commit 84676c1f21e8 ("genirq/affinity: assign vectors to all possible CPUs") we could end up with an MSI-X vector that did not have any online CPUs mapped. This would lead to I/O hangs since there was no CPU to receive the completion. Retrieve IRQ affinity information using pci_irq_get_affinity() and use this mapping to choose a reply queue. [mkp: tweaked commit desc] Cc: Hannes Reinecke Cc: "Martin K. Petersen" , Cc: James Bottomley , Cc: Christoph Hellwig , Cc: Don Brace Cc: Kashyap Desai Cc: Laurence Oberman Cc: Meelis Roos Cc: Artem Bityutskiy Cc: Mike Snitzer Fixes: 84676c1f21e8 ("genirq/affinity: assign vectors to all possible CPUs") Signed-off-by: Ming Lei Tested-by: Laurence Oberman Tested-by: Don Brace Tested-by: Artem Bityutskiy Acked-by: Don Brace Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hpsa.c | 73 +++++++++++++++++++++++++++++++++------------ drivers/scsi/hpsa.h | 1 + 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 604a39dba5d0..5b4b7f9be2d7 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -1040,11 +1040,7 @@ static void set_performant_mode(struct ctlr_info *h, struct CommandList *c, c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1); if (unlikely(!h->msix_vectors)) return; - if (likely(reply_queue == DEFAULT_REPLY_QUEUE)) - c->Header.ReplyQueue = - raw_smp_processor_id() % h->nreply_queues; - else - c->Header.ReplyQueue = reply_queue % h->nreply_queues; + c->Header.ReplyQueue = reply_queue; } } @@ -1058,10 +1054,7 @@ static void set_ioaccel1_performant_mode(struct ctlr_info *h, * Tell the controller to post the reply to the queue for this * processor. This seems to give the best I/O throughput. */ - if (likely(reply_queue == DEFAULT_REPLY_QUEUE)) - cp->ReplyQueue = smp_processor_id() % h->nreply_queues; - else - cp->ReplyQueue = reply_queue % h->nreply_queues; + cp->ReplyQueue = reply_queue; /* * Set the bits in the address sent down to include: * - performant mode bit (bit 0) @@ -1082,10 +1075,7 @@ static void set_ioaccel2_tmf_performant_mode(struct ctlr_info *h, /* Tell the controller to post the reply to the queue for this * processor. This seems to give the best I/O throughput. */ - if (likely(reply_queue == DEFAULT_REPLY_QUEUE)) - cp->reply_queue = smp_processor_id() % h->nreply_queues; - else - cp->reply_queue = reply_queue % h->nreply_queues; + cp->reply_queue = reply_queue; /* Set the bits in the address sent down to include: * - performant mode bit not used in ioaccel mode 2 * - pull count (bits 0-3) @@ -1104,10 +1094,7 @@ static void set_ioaccel2_performant_mode(struct ctlr_info *h, * Tell the controller to post the reply to the queue for this * processor. This seems to give the best I/O throughput. */ - if (likely(reply_queue == DEFAULT_REPLY_QUEUE)) - cp->reply_queue = smp_processor_id() % h->nreply_queues; - else - cp->reply_queue = reply_queue % h->nreply_queues; + cp->reply_queue = reply_queue; /* * Set the bits in the address sent down to include: * - performant mode bit not used in ioaccel mode 2 @@ -1152,6 +1139,8 @@ static void __enqueue_cmd_and_start_io(struct ctlr_info *h, { dial_down_lockup_detection_during_fw_flash(h, c); atomic_inc(&h->commands_outstanding); + + reply_queue = h->reply_map[raw_smp_processor_id()]; switch (c->cmd_type) { case CMD_IOACCEL1: set_ioaccel1_performant_mode(h, c, reply_queue); @@ -7244,6 +7233,26 @@ static void hpsa_disable_interrupt_mode(struct ctlr_info *h) h->msix_vectors = 0; } +static void hpsa_setup_reply_map(struct ctlr_info *h) +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + for (queue = 0; queue < h->msix_vectors; queue++) { + mask = pci_irq_get_affinity(h->pdev, queue); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + h->reply_map[cpu] = queue; + } + return; + +fallback: + for_each_possible_cpu(cpu) + h->reply_map[cpu] = 0; +} + /* If MSI/MSI-X is supported by the kernel we will try to enable it on * controllers that are capable. If not, we use legacy INTx mode. */ @@ -7639,6 +7648,10 @@ static int hpsa_pci_init(struct ctlr_info *h) err = hpsa_interrupt_mode(h); if (err) goto clean1; + + /* setup mapping between CPU and reply queue */ + hpsa_setup_reply_map(h); + err = hpsa_pci_find_memory_BAR(h->pdev, &h->paddr); if (err) goto clean2; /* intmode+region, pci */ @@ -8284,6 +8297,28 @@ static struct workqueue_struct *hpsa_create_controller_wq(struct ctlr_info *h, return wq; } +static void hpda_free_ctlr_info(struct ctlr_info *h) +{ + kfree(h->reply_map); + kfree(h); +} + +static struct ctlr_info *hpda_alloc_ctlr_info(void) +{ + struct ctlr_info *h; + + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return NULL; + + h->reply_map = kzalloc(sizeof(*h->reply_map) * nr_cpu_ids, GFP_KERNEL); + if (!h->reply_map) { + kfree(h); + return NULL; + } + return h; +} + static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int dac, rc; @@ -8321,7 +8356,7 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) * the driver. See comments in hpsa.h for more info. */ BUILD_BUG_ON(sizeof(struct CommandList) % COMMANDLIST_ALIGNMENT); - h = kzalloc(sizeof(*h), GFP_KERNEL); + h = hpda_alloc_ctlr_info(); if (!h) { dev_err(&pdev->dev, "Failed to allocate controller head\n"); return -ENOMEM; @@ -8726,7 +8761,7 @@ static void hpsa_remove_one(struct pci_dev *pdev) h->lockup_detected = NULL; /* init_one 2 */ /* (void) pci_disable_pcie_error_reporting(pdev); */ /* init_one 1 */ - kfree(h); /* init_one 1 */ + hpda_free_ctlr_info(h); /* init_one 1 */ } static int hpsa_suspend(__attribute__((unused)) struct pci_dev *pdev, diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index 018f980a701c..fb9f5e7f8209 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h @@ -158,6 +158,7 @@ struct bmic_controller_parameters { #pragma pack() struct ctlr_info { + unsigned int *reply_map; int ctlr; char devname[8]; char *product_name; -- GitLab From 3ad2e6f4f6bf91d75a63ee1f3ff83fcca6a47f64 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 13 Mar 2018 17:42:41 +0800 Subject: [PATCH 0654/1291] scsi: core: introduce force_blk_mq commit 2f31115e940c4afd49b99c33123534e2ac924ffb upstream. This patch introduces 'force_blk_mq' to the scsi_host_template so that drivers that have no desire to support the legacy I/O path can signal blk-mq only support. [mkp: commit desc] Cc: Omar Sandoval , Cc: "Martin K. Petersen" , Cc: James Bottomley , Cc: Christoph Hellwig , Cc: Don Brace Cc: Kashyap Desai Cc: Mike Snitzer Cc: Laurence Oberman Signed-off-by: Ming Lei Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hosts.c | 1 + include/scsi/scsi_host.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index dd9464920456..ef22b275d050 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -474,6 +474,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) shost->dma_boundary = 0xffffffff; shost->use_blk_mq = scsi_use_blk_mq; + shost->use_blk_mq = scsi_use_blk_mq || shost->hostt->force_blk_mq; device_initialize(&shost->shost_gendev); dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index a8b7bf879ced..9c1e4bad6581 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -452,6 +452,9 @@ struct scsi_host_template { /* True if the controller does not support WRITE SAME */ unsigned no_write_same:1; + /* True if the low-level driver supports blk-mq only */ + unsigned force_blk_mq:1; + /* * Countdown for host blocking with no commands outstanding. */ -- GitLab From 70b522f163bbb32e120b9fff6977f507abb44f9a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 13 Mar 2018 17:42:42 +0800 Subject: [PATCH 0655/1291] scsi: virtio_scsi: fix IO hang caused by automatic irq vector affinity commit b5b6e8c8d3b4cbeb447a0f10c7d5de3caa573299 upstream. Since commit 84676c1f21e8ff5 ("genirq/affinity: assign vectors to all possible CPUs") it is possible to end up in a scenario where only offline CPUs are mapped to an interrupt vector. This is only an issue for the legacy I/O path since with blk-mq/scsi-mq an I/O can't be submitted to a hardware queue if the queue isn't mapped to an online CPU. Fix this issue by forcing virtio-scsi to use blk-mq. [mkp: commit desc] Cc: Omar Sandoval , Cc: "Martin K. Petersen" , Cc: James Bottomley , Cc: Christoph Hellwig , Cc: Don Brace Cc: Kashyap Desai Cc: Paolo Bonzini Cc: Mike Snitzer Cc: Laurence Oberman Fixes: 84676c1f21e8 ("genirq/affinity: assign vectors to all possible CPUs") Signed-off-by: Ming Lei Reviewed-by: Hannes Reinecke Acked-by: Paolo Bonzini Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/virtio_scsi.c | 59 ++------------------------------------ 1 file changed, 3 insertions(+), 56 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 7c28e8d4955a..54e3a0f6844c 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -91,9 +91,6 @@ struct virtio_scsi_vq { struct virtio_scsi_target_state { seqcount_t tgt_seq; - /* Count of outstanding requests. */ - atomic_t reqs; - /* Currently active virtqueue for requests sent to this target. */ struct virtio_scsi_vq *req_vq; }; @@ -152,8 +149,6 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) struct virtio_scsi_cmd *cmd = buf; struct scsi_cmnd *sc = cmd->sc; struct virtio_scsi_cmd_resp *resp = &cmd->resp.cmd; - struct virtio_scsi_target_state *tgt = - scsi_target(sc->device)->hostdata; dev_dbg(&sc->device->sdev_gendev, "cmd %p response %u status %#02x sense_len %u\n", @@ -210,8 +205,6 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) } sc->scsi_done(sc); - - atomic_dec(&tgt->reqs); } static void virtscsi_vq_done(struct virtio_scsi *vscsi, @@ -580,10 +573,7 @@ static int virtscsi_queuecommand_single(struct Scsi_Host *sh, struct scsi_cmnd *sc) { struct virtio_scsi *vscsi = shost_priv(sh); - struct virtio_scsi_target_state *tgt = - scsi_target(sc->device)->hostdata; - atomic_inc(&tgt->reqs); return virtscsi_queuecommand(vscsi, &vscsi->req_vqs[0], sc); } @@ -596,55 +586,11 @@ static struct virtio_scsi_vq *virtscsi_pick_vq_mq(struct virtio_scsi *vscsi, return &vscsi->req_vqs[hwq]; } -static struct virtio_scsi_vq *virtscsi_pick_vq(struct virtio_scsi *vscsi, - struct virtio_scsi_target_state *tgt) -{ - struct virtio_scsi_vq *vq; - unsigned long flags; - u32 queue_num; - - local_irq_save(flags); - if (atomic_inc_return(&tgt->reqs) > 1) { - unsigned long seq; - - do { - seq = read_seqcount_begin(&tgt->tgt_seq); - vq = tgt->req_vq; - } while (read_seqcount_retry(&tgt->tgt_seq, seq)); - } else { - /* no writes can be concurrent because of atomic_t */ - write_seqcount_begin(&tgt->tgt_seq); - - /* keep previous req_vq if a reader just arrived */ - if (unlikely(atomic_read(&tgt->reqs) > 1)) { - vq = tgt->req_vq; - goto unlock; - } - - queue_num = smp_processor_id(); - while (unlikely(queue_num >= vscsi->num_queues)) - queue_num -= vscsi->num_queues; - tgt->req_vq = vq = &vscsi->req_vqs[queue_num]; - unlock: - write_seqcount_end(&tgt->tgt_seq); - } - local_irq_restore(flags); - - return vq; -} - static int virtscsi_queuecommand_multi(struct Scsi_Host *sh, struct scsi_cmnd *sc) { struct virtio_scsi *vscsi = shost_priv(sh); - struct virtio_scsi_target_state *tgt = - scsi_target(sc->device)->hostdata; - struct virtio_scsi_vq *req_vq; - - if (shost_use_blk_mq(sh)) - req_vq = virtscsi_pick_vq_mq(vscsi, sc); - else - req_vq = virtscsi_pick_vq(vscsi, tgt); + struct virtio_scsi_vq *req_vq = virtscsi_pick_vq_mq(vscsi, sc); return virtscsi_queuecommand(vscsi, req_vq, sc); } @@ -775,7 +721,6 @@ static int virtscsi_target_alloc(struct scsi_target *starget) return -ENOMEM; seqcount_init(&tgt->tgt_seq); - atomic_set(&tgt->reqs, 0); tgt->req_vq = &vscsi->req_vqs[0]; starget->hostdata = tgt; @@ -823,6 +768,7 @@ static struct scsi_host_template virtscsi_host_template_single = { .target_alloc = virtscsi_target_alloc, .target_destroy = virtscsi_target_destroy, .track_queue_depth = 1, + .force_blk_mq = 1, }; static struct scsi_host_template virtscsi_host_template_multi = { @@ -844,6 +790,7 @@ static struct scsi_host_template virtscsi_host_template_multi = { .target_destroy = virtscsi_target_destroy, .map_queues = virtscsi_map_queues, .track_queue_depth = 1, + .force_blk_mq = 1, }; #define virtscsi_config_get(vdev, fld) \ -- GitLab From d8b880fcd5a3b75e3d14b7f3fa021a5708f38dec Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 20 Apr 2018 14:55:52 -0700 Subject: [PATCH 0656/1291] kasan: add no_sanitize attribute for clang builds commit 12c8f25a016dff69ee284aa3338bebfd2cfcba33 upstream. KASAN uses the __no_sanitize_address macro to disable instrumentation of particular functions. Right now it's defined only for GCC build, which causes false positives when clang is used. This patch adds a definition for clang. Note, that clang's revision 329612 or higher is required. [andreyknvl@google.com: remove redundant #ifdef CONFIG_KASAN check] Link: http://lkml.kernel.org/r/c79aa31a2a2790f6131ed607c58b0dd45dd62a6c.1523967959.git.andreyknvl@google.com Link: http://lkml.kernel.org/r/4ad725cc903f8534f8c8a60f0daade5e3d674f8d.1523554166.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: David Rientjes Cc: Thomas Gleixner Cc: Ingo Molnar Cc: David Woodhouse Cc: Andrey Konovalov Cc: Will Deacon Cc: Greg Kroah-Hartman Cc: Paul Lawrence Cc: Sandipan Das Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Sodagudi Prasad Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-clang.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 070f85d92c15..28b76f0894d4 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -17,6 +17,9 @@ */ #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) +#undef __no_sanitize_address +#define __no_sanitize_address __attribute__((no_sanitize("address"))) + /* Clang doesn't have a way to turn it off per-function, yet. */ #ifdef __noretpoline #undef __noretpoline -- GitLab From 3eb86ff32eb54c4345b723ae8dd03bd7487d35bd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 8 Jan 2018 11:51:04 -0800 Subject: [PATCH 0657/1291] Mark HI and TASKLET softirq synchronous commit 3c53776e29f81719efcf8f7a6e30cdf753bee94d upstream. Way back in 4.9, we committed 4cd13c21b207 ("softirq: Let ksoftirqd do its job"), and ever since we've had small nagging issues with it. For example, we've had: 1ff688209e2e ("watchdog: core: make sure the watchdog_worker is not deferred") 8d5755b3f77b ("watchdog: softdog: fire watchdog even if softirqs do not get to run") 217f69743681 ("net: busy-poll: allow preemption in sk_busy_loop()") all of which worked around some of the effects of that commit. The DVB people have also complained that the commit causes excessive USB URB latencies, which seems to be due to the USB code using tasklets to schedule USB traffic. This seems to be an issue mainly when already living on the edge, but waiting for ksoftirqd to handle it really does seem to cause excessive latencies. Now Hanna Hawa reports that this issue isn't just limited to USB URB and DVB, but also causes timeout problems for the Marvell SoC team: "I'm facing kernel panic issue while running raid 5 on sata disks connected to Macchiatobin (Marvell community board with Armada-8040 SoC with 4 ARMv8 cores of CA72) Raid 5 built with Marvell DMA engine and async_tx mechanism (ASYNC_TX_DMA [=y]); the DMA driver (mv_xor_v2) uses a tasklet to clean the done descriptors from the queue" The latency problem causes a panic: mv_xor_v2 f0400000.xor: dma_sync_wait: timeout! Kernel panic - not syncing: async_tx_quiesce: DMA error waiting for transaction We've discussed simply just reverting the original commit entirely, and also much more involved solutions (with per-softirq threads etc). This patch is intentionally stupid and fairly limited, because the issue still remains, and the other solutions either got sidetracked or had other issues. We should probably also consider the timer softirqs to be synchronous and not be delayed to ksoftirqd (since they were the issue with the earlier watchdog problems), but that should be done as a separate patch. This does only the tasklet cases. Reported-and-tested-by: Hanna Hawa Reported-and-tested-by: Josef Griebichler Reported-by: Mauro Carvalho Chehab Cc: Alan Stern Cc: Greg Kroah-Hartman Cc: Eric Dumazet Cc: Ingo Molnar Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/softirq.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index f40ac7191257..a4c87cf27f9d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -79,12 +79,16 @@ static void wakeup_softirqd(void) /* * If ksoftirqd is scheduled, we do not want to process pending softirqs - * right now. Let ksoftirqd handle this at its own rate, to get fairness. + * right now. Let ksoftirqd handle this at its own rate, to get fairness, + * unless we're doing some of the synchronous softirqs. */ -static bool ksoftirqd_running(void) +#define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ)) +static bool ksoftirqd_running(unsigned long pending) { struct task_struct *tsk = __this_cpu_read(ksoftirqd); + if (pending & SOFTIRQ_NOW_MASK) + return false; return tsk && (tsk->state == TASK_RUNNING); } @@ -324,7 +328,7 @@ asmlinkage __visible void do_softirq(void) pending = local_softirq_pending(); - if (pending && !ksoftirqd_running()) + if (pending && !ksoftirqd_running(pending)) do_softirq_own_stack(); local_irq_restore(flags); @@ -351,7 +355,7 @@ void irq_enter(void) static inline void invoke_softirq(void) { - if (ksoftirqd_running()) + if (ksoftirqd_running(local_softirq_pending())) return; if (!force_irqthreads) { -- GitLab From b6f194b64b11e897ba084778d626ce6f57a0563a Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Tue, 17 Jul 2018 12:35:29 -0700 Subject: [PATCH 0658/1291] stop_machine: Disable preemption after queueing stopper threads commit 2610e88946632afb78aa58e61f11368ac4c0af7b upstream. This commit: 9fb8d5dc4b64 ("stop_machine, Disable preemption when waking two stopper threads") does not fully address the race condition that can occur as follows: On one CPU, call it CPU 3, thread 1 invokes cpu_stop_queue_two_works(2, 3,...), and the execution is such that thread 1 queues the works for migration/2 and migration/3, and is preempted after releasing the locks for migration/2 and migration/3, but before waking the threads. Then, On CPU 2, a kworker, call it thread 2, is running, and it invokes cpu_stop_queue_two_works(1, 2,...), such that thread 2 queues the works for migration/1 and migration/2. Meanwhile, on CPU 3, thread 1 resumes execution, and wakes migration/2 and migration/3. This means that when CPU 2 releases the locks for migration/1 and migration/2, but before it wakes those threads, it can be preempted by migration/2. If thread 2 is preempted by migration/2, then migration/2 will execute the first work item successfully, since migration/3 was woken up by CPU 3, but when it goes to execute the second work item, it disables preemption, calls multi_cpu_stop(), and thus, CPU 2 will wait forever for migration/1, which should have been woken up by thread 2. However migration/1 cannot be woken up by thread 2, since it is a kworker, so it is affine to CPU 2, but CPU 2 is running migration/2 with preemption disabled, so thread 2 will never run. Disable preemption after queueing works for stopper threads to ensure that the operation of queueing the works and waking the stopper threads is atomic. Co-Developed-by: Prasad Sodagudi Co-Developed-by: Pavankumar Kondeti Signed-off-by: Isaac J. Manjarres Signed-off-by: Prasad Sodagudi Signed-off-by: Pavankumar Kondeti Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bigeasy@linutronix.de Cc: gregkh@linuxfoundation.org Cc: matt@codeblueprint.co.uk Fixes: 9fb8d5dc4b64 ("stop_machine, Disable preemption when waking two stopper threads") Link: http://lkml.kernel.org/r/1531856129-9871-1-git-send-email-isaacm@codeaurora.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/stop_machine.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 1ff523dae6e2..e190d1ef3a23 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -260,6 +260,15 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, err = 0; __cpu_stop_queue_work(stopper1, work1, &wakeq); __cpu_stop_queue_work(stopper2, work2, &wakeq); + /* + * The waking up of stopper threads has to happen + * in the same scheduling context as the queueing. + * Otherwise, there is a possibility of one of the + * above stoppers being woken up by another CPU, + * and preempting us. This will cause us to n ot + * wake up the other stopper forever. + */ + preempt_disable(); unlock: raw_spin_unlock(&stopper2->lock); raw_spin_unlock_irq(&stopper1->lock); @@ -271,7 +280,6 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, } if (!err) { - preempt_disable(); wake_up_q(&wakeq); preempt_enable(); } -- GitLab From 6e32aa7149c1f3ae04fbfa6ec7ea01940a479ce5 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 9 Aug 2018 16:42:16 +0200 Subject: [PATCH 0659/1291] xen/netfront: don't cache skb_shinfo() commit d472b3a6cf63cd31cae1ed61930f07e6cd6671b5 upstream. skb_shinfo() can change when calling __pskb_pull_tail(): Don't cache its return value. Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Wei Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index dfc076f9ee4b..d5e790dd589a 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -894,7 +894,6 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, struct sk_buff *skb, struct sk_buff_head *list) { - struct skb_shared_info *shinfo = skb_shinfo(skb); RING_IDX cons = queue->rx.rsp_cons; struct sk_buff *nskb; @@ -903,15 +902,16 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, RING_GET_RESPONSE(&queue->rx, ++cons); skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0]; - if (shinfo->nr_frags == MAX_SKB_FRAGS) { + if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) { unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; BUG_ON(pull_to <= skb_headlen(skb)); __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } - BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS); + BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS); - skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag), + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + skb_frag_page(nfrag), rx->offset, rx->status, PAGE_SIZE); skb_shinfo(nskb)->nr_frags = 0; -- GitLab From 71b7ca5795008efe082742473bd606b85bc65fd6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 2 Aug 2018 10:44:42 -0700 Subject: [PATCH 0660/1291] scsi: sr: Avoid that opening a CD-ROM hangs with runtime power management enabled commit 1214fd7b497400d200e3f4e64e2338b303a20949 upstream. Surround scsi_execute() calls with scsi_autopm_get_device() and scsi_autopm_put_device(). Note: removing sr_mutex protection from the scsi_cd_get() and scsi_cd_put() calls is safe because the purpose of sr_mutex is to serialize cdrom_*() calls. This patch avoids that complaints similar to the following appear in the kernel log if runtime power management is enabled: INFO: task systemd-udevd:650 blocked for more than 120 seconds. Not tainted 4.18.0-rc7-dbg+ #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. systemd-udevd D28176 650 513 0x00000104 Call Trace: __schedule+0x444/0xfe0 schedule+0x4e/0xe0 schedule_preempt_disabled+0x18/0x30 __mutex_lock+0x41c/0xc70 mutex_lock_nested+0x1b/0x20 __blkdev_get+0x106/0x970 blkdev_get+0x22c/0x5a0 blkdev_open+0xe9/0x100 do_dentry_open.isra.19+0x33e/0x570 vfs_open+0x7c/0xd0 path_openat+0x6e3/0x1120 do_filp_open+0x11c/0x1c0 do_sys_open+0x208/0x2d0 __x64_sys_openat+0x59/0x70 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe Signed-off-by: Bart Van Assche Cc: Maurizio Lombardi Cc: Johannes Thumshirn Cc: Alan Stern Cc: Tested-by: Johannes Thumshirn Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sr.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 3f3cb72e0c0c..d0389b20574d 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -523,18 +523,26 @@ static int sr_init_command(struct scsi_cmnd *SCpnt) static int sr_block_open(struct block_device *bdev, fmode_t mode) { struct scsi_cd *cd; + struct scsi_device *sdev; int ret = -ENXIO; + cd = scsi_cd_get(bdev->bd_disk); + if (!cd) + goto out; + + sdev = cd->device; + scsi_autopm_get_device(sdev); check_disk_change(bdev); mutex_lock(&sr_mutex); - cd = scsi_cd_get(bdev->bd_disk); - if (cd) { - ret = cdrom_open(&cd->cdi, bdev, mode); - if (ret) - scsi_cd_put(cd); - } + ret = cdrom_open(&cd->cdi, bdev, mode); mutex_unlock(&sr_mutex); + + scsi_autopm_put_device(sdev); + if (ret) + scsi_cd_put(cd); + +out: return ret; } @@ -562,6 +570,8 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, if (ret) goto out; + scsi_autopm_get_device(sdev); + /* * Send SCSI addressing ioctls directly to mid level, send other * ioctls to cdrom/block level. @@ -570,15 +580,18 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case SCSI_IOCTL_GET_IDLUN: case SCSI_IOCTL_GET_BUS_NUMBER: ret = scsi_ioctl(sdev, cmd, argp); - goto out; + goto put; } ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg); if (ret != -ENOSYS) - goto out; + goto put; ret = scsi_ioctl(sdev, cmd, argp); +put: + scsi_autopm_put_device(sdev); + out: mutex_unlock(&sr_mutex); return ret; -- GitLab From fa085d77929bf3267876f79816a67b82d515826b Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 26 Jul 2018 16:34:44 -0700 Subject: [PATCH 0661/1291] scsi: qla2xxx: Fix memory leak for allocating abort IOCB commit 5e53be8e476a3397ed5383c23376f299555a2b43 upstream. In the case of IOCB QFull, Initiator code can leave behind a stale pointer to an SRB structure on the outstanding command array. Fixes: 82de802ad46e ("scsi: qla2xxx: Preparation for Target MQ.") Cc: stable@vger.kernel.org #v4.16+ Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_iocb.c | 53 +++++++++++++++++---------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 63bea6a65d51..8d579bf0fc81 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2128,34 +2128,11 @@ __qla2x00_alloc_iocbs(struct qla_qpair *qpair, srb_t *sp) req_cnt = 1; handle = 0; - if (!sp) - goto skip_cmd_array; - - /* Check for room in outstanding command list. */ - handle = req->current_outstanding_cmd; - for (index = 1; index < req->num_outstanding_cmds; index++) { - handle++; - if (handle == req->num_outstanding_cmds) - handle = 1; - if (!req->outstanding_cmds[handle]) - break; - } - if (index == req->num_outstanding_cmds) { - ql_log(ql_log_warn, vha, 0x700b, - "No room on outstanding cmd array.\n"); - goto queuing_error; - } - - /* Prep command array. */ - req->current_outstanding_cmd = handle; - req->outstanding_cmds[handle] = sp; - sp->handle = handle; - - /* Adjust entry-counts as needed. */ - if (sp->type != SRB_SCSI_CMD) + if (sp && (sp->type != SRB_SCSI_CMD)) { + /* Adjust entry-counts as needed. */ req_cnt = sp->iocbs; + } -skip_cmd_array: /* Check for room on request queue. */ if (req->cnt < req_cnt + 2) { if (ha->mqenable || IS_QLA83XX(ha) || IS_QLA27XX(ha)) @@ -2179,6 +2156,28 @@ __qla2x00_alloc_iocbs(struct qla_qpair *qpair, srb_t *sp) if (req->cnt < req_cnt + 2) goto queuing_error; + if (sp) { + /* Check for room in outstanding command list. */ + handle = req->current_outstanding_cmd; + for (index = 1; index < req->num_outstanding_cmds; index++) { + handle++; + if (handle == req->num_outstanding_cmds) + handle = 1; + if (!req->outstanding_cmds[handle]) + break; + } + if (index == req->num_outstanding_cmds) { + ql_log(ql_log_warn, vha, 0x700b, + "No room on outstanding cmd array.\n"); + goto queuing_error; + } + + /* Prep command array. */ + req->current_outstanding_cmd = handle; + req->outstanding_cmds[handle] = sp; + sp->handle = handle; + } + /* Prep packet */ req->cnt -= req_cnt; pkt = req->ring_ptr; @@ -2191,6 +2190,8 @@ __qla2x00_alloc_iocbs(struct qla_qpair *qpair, srb_t *sp) pkt->handle = handle; } + return pkt; + queuing_error: qpair->tgt_counters.num_alloc_iocb_failed++; return pkt; -- GitLab From b7722f4ac3533d48dc5996a4f7e5d847934179b0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Aug 2018 12:19:42 -0700 Subject: [PATCH 0662/1291] init: rename and re-order boot_cpu_state_init() commit b5b1404d0815894de0690de8a1ab58269e56eae6 upstream. This is purely a preparatory patch for upcoming changes during the 4.19 merge window. We have a function called "boot_cpu_state_init()" that isn't really about the bootup cpu state: that is done much earlier by the similarly named "boot_cpu_init()" (note lack of "state" in name). This function initializes some hotplug CPU state, and needs to run after the percpu data has been properly initialized. It even has a comment to that effect. Except it _doesn't_ actually run after the percpu data has been properly initialized. On x86 it happens to do that, but on at least arm and arm64, the percpu base pointers are initialized by the arch-specific 'smp_prepare_boot_cpu()' hook, which ran _after_ boot_cpu_state_init(). This had some unexpected results, and in particular we have a patch pending for the merge window that did the obvious cleanup of using 'this_cpu_write()' in the cpu hotplug init code: - per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE; + this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); which is obviously the right thing to do. Except because of the ordering issue, it actually failed miserably and unexpectedly on arm64. So this just fixes the ordering, and changes the name of the function to be 'boot_cpu_hotplug_init()' to make it obvious that it's about cpu hotplug state, because the core CPU state was supposed to have already been done earlier. Marked for stable, since the (not yet merged) patch that will show this problem is marked for stable. Reported-by: Vlastimil Babka Reported-by: Mian Yousaf Kaukab Suggested-by: Catalin Marinas Acked-by: Thomas Gleixner Cc: Will Deacon Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/cpu.h | 2 +- init/main.c | 2 +- kernel/cpu.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 9546bf2fe310..dc491b3cf4a6 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -30,7 +30,7 @@ struct cpu { }; extern void boot_cpu_init(void); -extern void boot_cpu_state_init(void); +extern void boot_cpu_hotplug_init(void); extern void cpu_init(void); extern void trap_init(void); diff --git a/init/main.c b/init/main.c index 0d88f37febcb..c4a45145e102 100644 --- a/init/main.c +++ b/init/main.c @@ -543,8 +543,8 @@ asmlinkage __visible void __init start_kernel(void) setup_command_line(command_line); setup_nr_cpu_ids(); setup_per_cpu_areas(); - boot_cpu_state_init(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ + boot_cpu_hotplug_init(); build_all_zonelists(NULL); page_alloc_init(); diff --git a/kernel/cpu.c b/kernel/cpu.c index f21bfa3172d8..d6affe6bd6b6 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2022,7 +2022,7 @@ void __init boot_cpu_init(void) /* * Must be called _AFTER_ setting up the per_cpu areas */ -void __init boot_cpu_state_init(void) +void __init boot_cpu_hotplug_init(void) { per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE; } -- GitLab From abfc0ec698fc8e4407069e0c2ef79c4f20680ee4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Aug 2018 09:03:58 -0400 Subject: [PATCH 0663/1291] root dentries need RCU-delayed freeing commit 90bad5e05bcdb0308cfa3d3a60f5c0b9c8e2efb3 upstream. Since mountpoint crossing can happen without leaving lazy mode, root dentries do need the same protection against having their memory freed without RCU delay as everything else in the tree. It's partially hidden by RCU delay between detaching from the mount tree and dropping the vfsmount reference, but the starting point of pathwalk can be on an already detached mount, in which case umount-caused RCU delay has already passed by the time the lazy pathwalk grabs rcu_read_lock(). If the starting point happens to be at the root of that vfsmount *and* that vfsmount covers the entire filesystem, we get trouble. Fixes: 48a066e72d97 ("RCU'd vsfmounts") Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 5f31a93150d1..ea7d40cb7053 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1922,10 +1922,12 @@ struct dentry *d_make_root(struct inode *root_inode) if (root_inode) { res = __d_alloc(root_inode->i_sb, NULL); - if (res) + if (res) { + res->d_flags |= DCACHE_RCUACCESS; d_instantiate(res, root_inode); - else + } else { iput(root_inode); + } } return res; } -- GitLab From d5426a384144e8e28730a22ccc7bcb71d22304a7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Aug 2018 10:15:54 -0400 Subject: [PATCH 0664/1291] make sure that __dentry_kill() always invalidates d_seq, unhashed or not commit 4c0d7cd5c8416b1ef41534d19163cb07ffaa03ab upstream. RCU pathwalk relies upon the assumption that anything that changes ->d_inode of a dentry will invalidate its ->d_seq. That's almost true - the one exception is that the final dput() of already unhashed dentry does *not* touch ->d_seq at all. Unhashing does, though, so for anything we'd found by RCU dcache lookup we are fine. Unfortunately, we can *start* with an unhashed dentry or jump into it. We could try and be careful in the (few) places where that could happen. Or we could just make the final dput() invalidate the damn thing, unhashed or not. The latter is much simpler and easier to backport, so let's do it that way. Reported-by: "Dae R. Jeong" Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index ea7d40cb7053..8d4935978fec 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -357,14 +357,11 @@ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; - bool hashed = !d_unhashed(dentry); - if (hashed) - raw_write_seqcount_begin(&dentry->d_seq); + raw_write_seqcount_begin(&dentry->d_seq); __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); - if (hashed) - raw_write_seqcount_end(&dentry->d_seq); + raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) -- GitLab From d9d46a226dd84073d2b109f8a587f40ccfcc616c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Aug 2018 17:21:17 -0400 Subject: [PATCH 0665/1291] fix mntput/mntput race commit 9ea0a46ca2c318fcc449c1e6b62a7230a17888f1 upstream. mntput_no_expire() does the calculation of total refcount under mount_lock; unfortunately, the decrement (as well as all increments) are done outside of it, leading to false positives in the "are we dropping the last reference" test. Consider the following situation: * mnt is a lazy-umounted mount, kept alive by two opened files. One of those files gets closed. Total refcount of mnt is 2. On CPU 42 mntput(mnt) (called from __fput()) drops one reference, decrementing component * After it has looked at component #0, the process on CPU 0 does mntget(), incrementing component #0, gets preempted and gets to run again - on CPU 69. There it does mntput(), which drops the reference (component #69) and proceeds to spin on mount_lock. * On CPU 42 our first mntput() finishes counting. It observes the decrement of component #69, but not the increment of component #0. As the result, the total it gets is not 1 as it should've been - it's 0. At which point we decide that vfsmount needs to be killed and proceed to free it and shut the filesystem down. However, there's still another opened file on that filesystem, with reference to (now freed) vfsmount, etc. and we are screwed. It's not a wide race, but it can be reproduced with artificial slowdown of the mnt_get_count() loop, and it should be easier to hit on SMP KVM setups. Fix consists of moving the refcount decrement under mount_lock; the tricky part is that we want (and can) keep the fast case (i.e. mount that still has non-NULL ->mnt_ns) entirely out of mount_lock. All places that zero mnt->mnt_ns are dropping some reference to mnt and they call synchronize_rcu() before that mntput(). IOW, if mntput() observes (under rcu_read_lock()) a non-NULL ->mnt_ns, it is guaranteed that there is another reference yet to be dropped. Reported-by: Jann Horn Tested-by: Jann Horn Fixes: 48a066e72d97 ("RCU'd vsfmounts") Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 1eb3bfd8be5a..bed19bd21c1f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1195,12 +1195,22 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); static void mntput_no_expire(struct mount *mnt) { rcu_read_lock(); - mnt_add_count(mnt, -1); - if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ + if (likely(READ_ONCE(mnt->mnt_ns))) { + /* + * Since we don't do lock_mount_hash() here, + * ->mnt_ns can change under us. However, if it's + * non-NULL, then there's a reference that won't + * be dropped until after an RCU delay done after + * turning ->mnt_ns NULL. So if we observe it + * non-NULL under rcu_read_lock(), the reference + * we are dropping is not the final one. + */ + mnt_add_count(mnt, -1); rcu_read_unlock(); return; } lock_mount_hash(); + mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { rcu_read_unlock(); unlock_mount_hash(); -- GitLab From e5751c84402f62c8d5b30af96b04180c5c0ad48d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Aug 2018 17:51:32 -0400 Subject: [PATCH 0666/1291] fix __legitimize_mnt()/mntput() race commit 119e1ef80ecfe0d1deb6378d4ab41f5b71519de1 upstream. __legitimize_mnt() has two problems - one is that in case of success the check of mount_lock is not ordered wrt preceding increment of refcount, making it possible to have successful __legitimize_mnt() on one CPU just before the otherwise final mntpu() on another, with __legitimize_mnt() not seeing mntput() taking the lock and mntput() not seeing the increment done by __legitimize_mnt(). Solved by a pair of barriers. Another is that failure of __legitimize_mnt() on the second read_seqretry() leaves us with reference that'll need to be dropped by caller; however, if that races with final mntput() we can end up with caller dropping rcu_read_lock() and doing mntput() to release that reference - with the first mntput() having freed the damn thing just as rcu_read_lock() had been dropped. Solution: in "do mntput() yourself" failure case grab mount_lock, check if MNT_DOOMED has been set by racing final mntput() that has missed our increment and if it has - undo the increment and treat that as "failure, caller doesn't need to drop anything" case. It's not easy to hit - the final mntput() has to come right after the first read_seqretry() in __legitimize_mnt() *and* manage to miss the increment done by __legitimize_mnt() before the second read_seqretry() in there. The things that are almost impossible to hit on bare hardware are not impossible on SMP KVM, though... Reported-by: Oleg Nesterov Fixes: 48a066e72d97 ("RCU'd vsfmounts") Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/namespace.c b/fs/namespace.c index bed19bd21c1f..9dc146e7b5e0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -659,12 +659,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) return 0; mnt = real_mount(bastard); mnt_add_count(mnt, 1); + smp_mb(); // see mntput_no_expire() if (likely(!read_seqretry(&mount_lock, seq))) return 0; if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { mnt_add_count(mnt, -1); return 1; } + lock_mount_hash(); + if (unlikely(bastard->mnt_flags & MNT_DOOMED)) { + mnt_add_count(mnt, -1); + unlock_mount_hash(); + return 1; + } + unlock_mount_hash(); + /* caller will mntput() */ return -1; } @@ -1210,6 +1219,11 @@ static void mntput_no_expire(struct mount *mnt) return; } lock_mount_hash(); + /* + * make sure that if __legitimize_mnt() has not seen us grab + * mount_lock, we'll see their refcount increment here. + */ + smp_mb(); mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { rcu_read_unlock(); -- GitLab From 2424869e4cd1d2d08b0c23b709b116d48d1a42aa Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 5 Jan 2018 18:02:55 -0200 Subject: [PATCH 0667/1291] mtd: nand: qcom: Add a NULL check for devm_kasprintf() commit 069f05346d01e7298939f16533953cdf52370be3 upstream. devm_kasprintf() may fail, so we should better add a NULL check and propagate an error on failure. Signed-off-by: Fabio Estevam Signed-off-by: Boris Brezillon Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/qcom_nandc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c index 3baddfc997d1..b49ca02b399d 100644 --- a/drivers/mtd/nand/qcom_nandc.c +++ b/drivers/mtd/nand/qcom_nandc.c @@ -2544,6 +2544,9 @@ static int qcom_nand_host_init(struct qcom_nand_controller *nandc, nand_set_flash_node(chip, dn); mtd->name = devm_kasprintf(dev, GFP_KERNEL, "qcom_nand.%d", host->cs); + if (!mtd->name) + return -ENOMEM; + mtd->owner = THIS_MODULE; mtd->dev.parent = dev; -- GitLab From 4290940dcb07f8876b294099005e803aab576e8d Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 7 Dec 2017 19:53:34 +0800 Subject: [PATCH 0668/1291] phy: phy-mtk-tphy: use auto instead of force to bypass utmi signals commit 00c0092c5f62147b7d85f0c6f1cf245a0a1ff3b6 upstream. When system is running, if usb2 phy is forced to bypass utmi signals, all PLL will be turned off, and it can't detect device connection anymore, so replace force mode with auto mode which can bypass utmi signals automatically if no device attached for normal flow. But keep the force mode to fix RX sensitivity degradation issue. Signed-off-by: Chunfeng Yun Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/phy/mediatek/phy-mtk-tphy.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c index 721a2a1c97ef..a63bba12aee4 100644 --- a/drivers/phy/mediatek/phy-mtk-tphy.c +++ b/drivers/phy/mediatek/phy-mtk-tphy.c @@ -438,9 +438,9 @@ static void u2_phy_instance_init(struct mtk_tphy *tphy, u32 index = instance->index; u32 tmp; - /* switch to USB function. (system register, force ip into usb mode) */ + /* switch to USB function, and enable usb pll */ tmp = readl(com + U3P_U2PHYDTM0); - tmp &= ~P2C_FORCE_UART_EN; + tmp &= ~(P2C_FORCE_UART_EN | P2C_FORCE_SUSPENDM); tmp |= P2C_RG_XCVRSEL_VAL(1) | P2C_RG_DATAIN_VAL(0); writel(tmp, com + U3P_U2PHYDTM0); @@ -500,10 +500,8 @@ static void u2_phy_instance_power_on(struct mtk_tphy *tphy, u32 index = instance->index; u32 tmp; - /* (force_suspendm=0) (let suspendm=1, enable usb 480MHz pll) */ tmp = readl(com + U3P_U2PHYDTM0); - tmp &= ~(P2C_FORCE_SUSPENDM | P2C_RG_XCVRSEL); - tmp &= ~(P2C_RG_DATAIN | P2C_DTM0_PART_MASK); + tmp &= ~(P2C_RG_XCVRSEL | P2C_RG_DATAIN | P2C_DTM0_PART_MASK); writel(tmp, com + U3P_U2PHYDTM0); /* OTG Enable */ @@ -538,7 +536,6 @@ static void u2_phy_instance_power_off(struct mtk_tphy *tphy, tmp = readl(com + U3P_U2PHYDTM0); tmp &= ~(P2C_RG_XCVRSEL | P2C_RG_DATAIN); - tmp |= P2C_FORCE_SUSPENDM; writel(tmp, com + U3P_U2PHYDTM0); /* OTG Disable */ @@ -546,18 +543,16 @@ static void u2_phy_instance_power_off(struct mtk_tphy *tphy, tmp &= ~PA6_RG_U2_OTG_VBUSCMP_EN; writel(tmp, com + U3P_USBPHYACR6); - /* let suspendm=0, set utmi into analog power down */ - tmp = readl(com + U3P_U2PHYDTM0); - tmp &= ~P2C_RG_SUSPENDM; - writel(tmp, com + U3P_U2PHYDTM0); - udelay(1); - tmp = readl(com + U3P_U2PHYDTM1); tmp &= ~(P2C_RG_VBUSVALID | P2C_RG_AVALID); tmp |= P2C_RG_SESSEND; writel(tmp, com + U3P_U2PHYDTM1); if (tphy->pdata->avoid_rx_sen_degradation && index) { + tmp = readl(com + U3P_U2PHYDTM0); + tmp &= ~(P2C_RG_SUSPENDM | P2C_FORCE_SUSPENDM); + writel(tmp, com + U3P_U2PHYDTM0); + tmp = readl(com + U3D_U2PHYDCR0); tmp &= ~P2C_RG_SIF_U2PLL_FORCE_ON; writel(tmp, com + U3D_U2PHYDCR0); -- GitLab From f6ec33f6bd3723a8146768106434ef6ab3d9d990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ronald=20Tschal=C3=A4r?= Date: Wed, 25 Oct 2017 22:14:53 -0700 Subject: [PATCH 0669/1291] Bluetooth: hci_ldisc: Allow sleeping while proto locks are held. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 67d2f8781b9f00d1089aafcfa3dc09fcd0f343e2 upstream. Commit dec2c92880cc5435381d50e3045ef018a762a917 ("Bluetooth: hci_ldisc: Use rwlocking to avoid closing proto races") introduced locks in hci_ldisc that are held while calling the proto functions. These locks are rwlock's, and hence do not allow sleeping while they are held. However, the proto functions that hci_bcm registers use mutexes and hence need to be able to sleep. In more detail: hci_uart_tty_receive() and hci_uart_dequeue() both acquire the rwlock, after which they call proto->recv() and proto->dequeue(), respectively. In the case of hci_bcm these point to bcm_recv() and bcm_dequeue(). The latter both acquire the bcm_device_lock, which is a mutex, so doing so results in a call to might_sleep(). But since we're holding a rwlock in hci_ldisc, that results in the following BUG (this for the dequeue case - a similar one for the receive case is omitted for brevity): BUG: sleeping function called from invalid context at kernel/locking/mutex.c in_atomic(): 1, irqs_disabled(): 0, pid: 7303, name: kworker/7:3 INFO: lockdep is turned off. CPU: 7 PID: 7303 Comm: kworker/7:3 Tainted: G W OE 4.13.2+ #17 Hardware name: Apple Inc. MacBookPro13,3/Mac-A5C67F76ED83108C, BIOS MBP133.8 Workqueue: events hci_uart_write_work [hci_uart] Call Trace: dump_stack+0x8e/0xd6 ___might_sleep+0x164/0x250 __might_sleep+0x4a/0x80 __mutex_lock+0x59/0xa00 ? lock_acquire+0xa3/0x1f0 ? lock_acquire+0xa3/0x1f0 ? hci_uart_write_work+0xd3/0x160 [hci_uart] mutex_lock_nested+0x1b/0x20 ? mutex_lock_nested+0x1b/0x20 bcm_dequeue+0x21/0xc0 [hci_uart] hci_uart_write_work+0xe6/0x160 [hci_uart] process_one_work+0x253/0x6a0 worker_thread+0x4d/0x3b0 kthread+0x133/0x150 We can't replace the mutex in hci_bcm, because there are other calls there that might sleep. Therefore this replaces the rwlock's in hci_ldisc with rw_semaphore's (which allow sleeping). This is a safer approach anyway as it reduces the restrictions on the proto callbacks. Also, because acquiring write-lock is very rare compared to acquiring the read-lock, the percpu variant of rw_semaphore is used. Lastly, because hci_uart_tx_wakeup() may be called from an IRQ context, we can't block (sleep) while trying acquire the read lock there, so we use the trylock variant. Signed-off-by: Ronald Tschalär Reviewed-by: Lukas Wunner Signed-off-by: Marcel Holtmann Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_ldisc.c | 38 ++++++++++++++++++++--------------- drivers/bluetooth/hci_uart.h | 2 +- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 6aef3bde10d7..c823914b3a80 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -115,12 +115,12 @@ static inline struct sk_buff *hci_uart_dequeue(struct hci_uart *hu) struct sk_buff *skb = hu->tx_skb; if (!skb) { - read_lock(&hu->proto_lock); + percpu_down_read(&hu->proto_lock); if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) skb = hu->proto->dequeue(hu); - read_unlock(&hu->proto_lock); + percpu_up_read(&hu->proto_lock); } else { hu->tx_skb = NULL; } @@ -130,7 +130,14 @@ static inline struct sk_buff *hci_uart_dequeue(struct hci_uart *hu) int hci_uart_tx_wakeup(struct hci_uart *hu) { - read_lock(&hu->proto_lock); + /* This may be called in an IRQ context, so we can't sleep. Therefore + * we try to acquire the lock only, and if that fails we assume the + * tty is being closed because that is the only time the write lock is + * acquired. If, however, at some point in the future the write lock + * is also acquired in other situations, then this must be revisited. + */ + if (!percpu_down_read_trylock(&hu->proto_lock)) + return 0; if (!test_bit(HCI_UART_PROTO_READY, &hu->flags)) goto no_schedule; @@ -145,7 +152,7 @@ int hci_uart_tx_wakeup(struct hci_uart *hu) schedule_work(&hu->write_work); no_schedule: - read_unlock(&hu->proto_lock); + percpu_up_read(&hu->proto_lock); return 0; } @@ -247,12 +254,12 @@ static int hci_uart_flush(struct hci_dev *hdev) tty_ldisc_flush(tty); tty_driver_flush_buffer(tty); - read_lock(&hu->proto_lock); + percpu_down_read(&hu->proto_lock); if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) hu->proto->flush(hu); - read_unlock(&hu->proto_lock); + percpu_up_read(&hu->proto_lock); return 0; } @@ -275,15 +282,15 @@ static int hci_uart_send_frame(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s: type %d len %d", hdev->name, hci_skb_pkt_type(skb), skb->len); - read_lock(&hu->proto_lock); + percpu_down_read(&hu->proto_lock); if (!test_bit(HCI_UART_PROTO_READY, &hu->flags)) { - read_unlock(&hu->proto_lock); + percpu_up_read(&hu->proto_lock); return -EUNATCH; } hu->proto->enqueue(hu, skb); - read_unlock(&hu->proto_lock); + percpu_up_read(&hu->proto_lock); hci_uart_tx_wakeup(hu); @@ -486,7 +493,7 @@ static int hci_uart_tty_open(struct tty_struct *tty) INIT_WORK(&hu->init_ready, hci_uart_init_work); INIT_WORK(&hu->write_work, hci_uart_write_work); - rwlock_init(&hu->proto_lock); + percpu_init_rwsem(&hu->proto_lock); /* Flush any pending characters in the driver */ tty_driver_flush_buffer(tty); @@ -503,7 +510,6 @@ static void hci_uart_tty_close(struct tty_struct *tty) { struct hci_uart *hu = tty->disc_data; struct hci_dev *hdev; - unsigned long flags; BT_DBG("tty %p", tty); @@ -518,9 +524,9 @@ static void hci_uart_tty_close(struct tty_struct *tty) hci_uart_close(hdev); if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) { - write_lock_irqsave(&hu->proto_lock, flags); + percpu_down_write(&hu->proto_lock); clear_bit(HCI_UART_PROTO_READY, &hu->flags); - write_unlock_irqrestore(&hu->proto_lock, flags); + percpu_up_write(&hu->proto_lock); cancel_work_sync(&hu->write_work); @@ -582,10 +588,10 @@ static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, if (!hu || tty != hu->tty) return; - read_lock(&hu->proto_lock); + percpu_down_read(&hu->proto_lock); if (!test_bit(HCI_UART_PROTO_READY, &hu->flags)) { - read_unlock(&hu->proto_lock); + percpu_up_read(&hu->proto_lock); return; } @@ -593,7 +599,7 @@ static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, * tty caller */ hu->proto->recv(hu, data, count); - read_unlock(&hu->proto_lock); + percpu_up_read(&hu->proto_lock); if (hu->hdev) hu->hdev->stat.byte_rx += count; diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index d9cd95d81149..66e8c68e4607 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -87,7 +87,7 @@ struct hci_uart { struct work_struct write_work; const struct hci_uart_proto *proto; - rwlock_t proto_lock; /* Stop work for proto close */ + struct percpu_rw_semaphore proto_lock; /* Stop work for proto close */ void *priv; struct sk_buff *tx_skb; -- GitLab From 4a53c4e84ace1bc75157a7281af3fe8f5b19d08c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 17 Nov 2017 00:54:53 +0100 Subject: [PATCH 0670/1291] Bluetooth: hci_serdev: Init hci_uart proto_lock to avoid oops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d73e172816652772114827abaa2dbc053eecbbd7 upstream. John Stultz reports a boot time crash with the HiKey board (which uses hci_serdev) occurring in hci_uart_tx_wakeup(). That function is contained in hci_ldisc.c, but also called from the newer hci_serdev.c. It acquires the proto_lock in struct hci_uart and it turns out that we forgot to init the lock in the serdev code path, thus causing the crash. John bisected the crash to commit 67d2f8781b9f ("Bluetooth: hci_ldisc: Allow sleeping while proto locks are held"), but the issue was present before and the commit merely exposed it. (Perhaps by luck, the crash did not occur with rwlocks.) Init the proto_lock in the serdev code path to avoid the oops. Stack trace for posterity: Unable to handle kernel read from unreadable memory at 406f127000 [000000406f127000] user address but active_mm is swapper Internal error: Oops: 96000005 [#1] PREEMPT SMP Hardware name: HiKey Development Board (DT) Call trace: hci_uart_tx_wakeup+0x38/0x148 hci_uart_send_frame+0x28/0x38 hci_send_frame+0x64/0xc0 hci_cmd_work+0x98/0x110 process_one_work+0x134/0x330 worker_thread+0x130/0x468 kthread+0xf8/0x128 ret_from_fork+0x10/0x18 Link: https://lkml.org/lkml/2017/11/15/908 Reported-and-tested-by: John Stultz Cc: Ronald Tschalär Cc: Rob Herring Cc: Sumit Semwal Signed-off-by: Lukas Wunner Signed-off-by: Marcel Holtmann Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_serdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index b725ac4f7ff6..52e6d4d1608e 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -304,6 +304,7 @@ int hci_uart_register_device(struct hci_uart *hu, hci_set_drvdata(hdev, hu); INIT_WORK(&hu->write_work, hci_uart_write_work); + percpu_init_rwsem(&hu->proto_lock); /* Only when vendor specific setup callback is provided, consider * the manufacturer information valid. This avoids filling in the -- GitLab From 93bf5456d85a1e86c11516a97317a15785c9a0db Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 15 Jun 2018 09:41:29 +0200 Subject: [PATCH 0671/1291] ARM: dts: imx6sx: fix irq for pcie bridge commit 1bcfe0564044be578841744faea1c2f46adc8178 upstream. Use the correct IRQ line for the MSI controller in the PCIe host controller. Apparently a different IRQ line is used compared to other i.MX6 variants. Without this change MSI IRQs aren't properly propagated to the upstream interrupt controller. Signed-off-by: Oleksij Rempel Reviewed-by: Lucas Stach Fixes: b1d17f68e5c5 ("ARM: dts: imx: add initial imx6sx device tree source") Signed-off-by: Shawn Guo Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx6sx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 6c7eb54be9e2..d64438bfa68b 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -1305,7 +1305,7 @@ pcie: pcie@8ffc000 { 0x82000000 0 0x08000000 0x08000000 0 0x00f00000>; bus-range = <0x00 0xff>; num-lanes = <1>; - interrupts = ; + interrupts = ; clocks = <&clks IMX6SX_CLK_PCIE_REF_125M>, <&clks IMX6SX_CLK_PCIE_AXI>, <&clks IMX6SX_CLK_LVDS1_OUT>, -- GitLab From f7a0871d8141c3ac60123fbc4923e5e3c121b094 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 3 Aug 2018 16:41:39 +0200 Subject: [PATCH 0672/1291] x86/paravirt: Fix spectre-v2 mitigations for paravirt guests commit 5800dc5c19f34e6e03b5adab1282535cb102fafd upstream. Nadav reported that on guests we're failing to rewrite the indirect calls to CALLEE_SAVE paravirt functions. In particular the pv_queued_spin_unlock() call is left unpatched and that is all over the place. This obviously wrecks Spectre-v2 mitigation (for paravirt guests) which relies on not actually having indirect calls around. The reason is an incorrect clobber test in paravirt_patch_call(); this function rewrites an indirect call with a direct call to the _SAME_ function, there is no possible way the clobbers can be different because of this. Therefore remove this clobber check. Also put WARNs on the other patch failure case (not enough room for the instruction) which I've not seen trigger in my (limited) testing. Three live kernel image disassemblies for lock_sock_nested (as a small function that illustrates the problem nicely). PRE is the current situation for guests, POST is with this patch applied and NATIVE is with or without the patch for !guests. PRE: (gdb) disassemble lock_sock_nested Dump of assembler code for function lock_sock_nested: 0xffffffff817be970 <+0>: push %rbp 0xffffffff817be971 <+1>: mov %rdi,%rbp 0xffffffff817be974 <+4>: push %rbx 0xffffffff817be975 <+5>: lea 0x88(%rbp),%rbx 0xffffffff817be97c <+12>: callq 0xffffffff819f7160 <_cond_resched> 0xffffffff817be981 <+17>: mov %rbx,%rdi 0xffffffff817be984 <+20>: callq 0xffffffff819fbb00 <_raw_spin_lock_bh> 0xffffffff817be989 <+25>: mov 0x8c(%rbp),%eax 0xffffffff817be98f <+31>: test %eax,%eax 0xffffffff817be991 <+33>: jne 0xffffffff817be9ba 0xffffffff817be993 <+35>: movl $0x1,0x8c(%rbp) 0xffffffff817be99d <+45>: mov %rbx,%rdi 0xffffffff817be9a0 <+48>: callq *0xffffffff822299e8 0xffffffff817be9a7 <+55>: pop %rbx 0xffffffff817be9a8 <+56>: pop %rbp 0xffffffff817be9a9 <+57>: mov $0x200,%esi 0xffffffff817be9ae <+62>: mov $0xffffffff817be993,%rdi 0xffffffff817be9b5 <+69>: jmpq 0xffffffff81063ae0 <__local_bh_enable_ip> 0xffffffff817be9ba <+74>: mov %rbp,%rdi 0xffffffff817be9bd <+77>: callq 0xffffffff817be8c0 <__lock_sock> 0xffffffff817be9c2 <+82>: jmp 0xffffffff817be993 End of assembler dump. POST: (gdb) disassemble lock_sock_nested Dump of assembler code for function lock_sock_nested: 0xffffffff817be970 <+0>: push %rbp 0xffffffff817be971 <+1>: mov %rdi,%rbp 0xffffffff817be974 <+4>: push %rbx 0xffffffff817be975 <+5>: lea 0x88(%rbp),%rbx 0xffffffff817be97c <+12>: callq 0xffffffff819f7160 <_cond_resched> 0xffffffff817be981 <+17>: mov %rbx,%rdi 0xffffffff817be984 <+20>: callq 0xffffffff819fbb00 <_raw_spin_lock_bh> 0xffffffff817be989 <+25>: mov 0x8c(%rbp),%eax 0xffffffff817be98f <+31>: test %eax,%eax 0xffffffff817be991 <+33>: jne 0xffffffff817be9ba 0xffffffff817be993 <+35>: movl $0x1,0x8c(%rbp) 0xffffffff817be99d <+45>: mov %rbx,%rdi 0xffffffff817be9a0 <+48>: callq 0xffffffff810a0c20 <__raw_callee_save___pv_queued_spin_unlock> 0xffffffff817be9a5 <+53>: xchg %ax,%ax 0xffffffff817be9a7 <+55>: pop %rbx 0xffffffff817be9a8 <+56>: pop %rbp 0xffffffff817be9a9 <+57>: mov $0x200,%esi 0xffffffff817be9ae <+62>: mov $0xffffffff817be993,%rdi 0xffffffff817be9b5 <+69>: jmpq 0xffffffff81063aa0 <__local_bh_enable_ip> 0xffffffff817be9ba <+74>: mov %rbp,%rdi 0xffffffff817be9bd <+77>: callq 0xffffffff817be8c0 <__lock_sock> 0xffffffff817be9c2 <+82>: jmp 0xffffffff817be993 End of assembler dump. NATIVE: (gdb) disassemble lock_sock_nested Dump of assembler code for function lock_sock_nested: 0xffffffff817be970 <+0>: push %rbp 0xffffffff817be971 <+1>: mov %rdi,%rbp 0xffffffff817be974 <+4>: push %rbx 0xffffffff817be975 <+5>: lea 0x88(%rbp),%rbx 0xffffffff817be97c <+12>: callq 0xffffffff819f7160 <_cond_resched> 0xffffffff817be981 <+17>: mov %rbx,%rdi 0xffffffff817be984 <+20>: callq 0xffffffff819fbb00 <_raw_spin_lock_bh> 0xffffffff817be989 <+25>: mov 0x8c(%rbp),%eax 0xffffffff817be98f <+31>: test %eax,%eax 0xffffffff817be991 <+33>: jne 0xffffffff817be9ba 0xffffffff817be993 <+35>: movl $0x1,0x8c(%rbp) 0xffffffff817be99d <+45>: mov %rbx,%rdi 0xffffffff817be9a0 <+48>: movb $0x0,(%rdi) 0xffffffff817be9a3 <+51>: nopl 0x0(%rax) 0xffffffff817be9a7 <+55>: pop %rbx 0xffffffff817be9a8 <+56>: pop %rbp 0xffffffff817be9a9 <+57>: mov $0x200,%esi 0xffffffff817be9ae <+62>: mov $0xffffffff817be993,%rdi 0xffffffff817be9b5 <+69>: jmpq 0xffffffff81063ae0 <__local_bh_enable_ip> 0xffffffff817be9ba <+74>: mov %rbp,%rdi 0xffffffff817be9bd <+77>: callq 0xffffffff817be8c0 <__lock_sock> 0xffffffff817be9c2 <+82>: jmp 0xffffffff817be993 End of assembler dump. Fixes: 63f70270ccd9 ("[PATCH] i386: PARAVIRT: add common patching machinery") Fixes: 3010a0663fd9 ("x86/paravirt, objtool: Annotate indirect calls") Reported-by: Nadav Amit Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Woodhouse Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/paravirt.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e1df9ef5d78c..f3559b84cd75 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -88,10 +88,12 @@ unsigned paravirt_patch_call(void *insnbuf, struct branch *b = insnbuf; unsigned long delta = (unsigned long)target - (addr+5); - if (tgt_clobbers & ~site_clobbers) - return len; /* target would clobber too much for this site */ - if (len < 5) + if (len < 5) { +#ifdef CONFIG_RETPOLINE + WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr); +#endif return len; /* call too long for patch site */ + } b->opcode = 0xe8; /* call */ b->delta = delta; @@ -106,8 +108,12 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target, struct branch *b = insnbuf; unsigned long delta = (unsigned long)target - (addr+5); - if (len < 5) + if (len < 5) { +#ifdef CONFIG_RETPOLINE + WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr); +#endif return len; /* call too long for patch site */ + } b->opcode = 0xe9; /* jmp */ b->delta = delta; -- GitLab From f374b5593e44c01265156b4c4070b618097f401b Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 26 Jul 2018 13:14:55 +0200 Subject: [PATCH 0673/1291] x86/speculation: Protect against userspace-userspace spectreRSB commit fdf82a7856b32d905c39afc85e34364491e46346 upstream. The article "Spectre Returns! Speculation Attacks using the Return Stack Buffer" [1] describes two new (sub-)variants of spectrev2-like attacks, making use solely of the RSB contents even on CPUs that don't fallback to BTB on RSB underflow (Skylake+). Mitigate userspace-userspace attacks by always unconditionally filling RSB on context switch when the generic spectrev2 mitigation has been enabled. [1] https://arxiv.org/pdf/1807.07940.pdf Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Tim Chen Cc: Konrad Rzeszutek Wilk Cc: Borislav Petkov Cc: David Woodhouse Cc: Peter Zijlstra Cc: Linus Torvalds Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1807261308190.997@cbobk.fhfr.pm Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 38 +++++++------------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7416fc206b4a..1d3bbaa4d768 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -311,23 +311,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } -/* Check for Skylake-like CPUs (for RSB handling) */ -static bool __init is_skylake_era(void) -{ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6) { - switch (boot_cpu_data.x86_model) { - case INTEL_FAM6_SKYLAKE_MOBILE: - case INTEL_FAM6_SKYLAKE_DESKTOP: - case INTEL_FAM6_SKYLAKE_X: - case INTEL_FAM6_KABYLAKE_MOBILE: - case INTEL_FAM6_KABYLAKE_DESKTOP: - return true; - } - } - return false; -} - static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -388,22 +371,15 @@ static void __init spectre_v2_select_mitigation(void) pr_info("%s\n", spectre_v2_strings[mode]); /* - * If neither SMEP nor PTI are available, there is a risk of - * hitting userspace addresses in the RSB after a context switch - * from a shallow call stack to a deeper one. To prevent this fill - * the entire RSB, even when using IBRS. + * If spectre v2 protection has been enabled, unconditionally fill + * RSB during a context switch; this protects against two independent + * issues: * - * Skylake era CPUs have a separate issue with *underflow* of the - * RSB, when they will predict 'ret' targets from the generic BTB. - * The proper mitigation for this is IBRS. If IBRS is not supported - * or deactivated in favour of retpolines the RSB fill on context - * switch is required. + * - RSB underflow (and switch to BTB) on Skylake+ + * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs */ - if ((!boot_cpu_has(X86_FEATURE_PTI) && - !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { - setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); - pr_info("Spectre v2 mitigation: Filling RSB on context switch\n"); - } + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); /* Initialize Indirect Branch Prediction Barrier if supported */ if (boot_cpu_has(X86_FEATURE_IBPB)) { -- GitLab From bf6ac84a1bd1d250833159c3e3e97fd75e03acaf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 28 Apr 2018 21:37:03 +0900 Subject: [PATCH 0674/1291] kprobes/x86: Fix %p uses in error messages commit 0ea063306eecf300fcf06d2f5917474b580f666f upstream. Remove all %p uses in error messages in kprobes/x86. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Arnd Bergmann Cc: David Howells Cc: David S . Miller Cc: Heiko Carstens Cc: Jon Medhurst Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thomas Richter Cc: Tobin C . Harding Cc: Will Deacon Cc: acme@kernel.org Cc: akpm@linux-foundation.org Cc: brueckner@linux.vnet.ibm.com Cc: linux-arch@vger.kernel.org Cc: rostedt@goodmis.org Cc: schwidefsky@de.ibm.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/lkml/152491902310.9916.13355297638917767319.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index f1030c522e06..1e7709337827 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -394,8 +394,6 @@ int __copy_instruction(u8 *dest, u8 *src, struct insn *insn) - (u8 *) dest; if ((s64) (s32) newdisp != newdisp) { pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp); - pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", - src, dest, insn->displacement.value); return 0; } disp = (u8 *) dest + insn_offset_displacement(insn); @@ -621,8 +619,7 @@ static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs, * Raise a BUG or we'll continue in an endless reentering loop * and eventually a stack overflow. */ - printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", - p->addr); + pr_err("Unrecoverable kprobe detected.\n"); dump_kprobe(p); BUG(); default: -- GitLab From 688e5a2087c3decacb2245af8b0b053c81e1a99e Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 3 Aug 2018 10:05:50 -0700 Subject: [PATCH 0675/1291] x86/irqflags: Provide a declaration for native_save_fl commit 208cbb32558907f68b3b2a081ca2337ac3744794 upstream. It was reported that the commit d0a8d9378d16 is causing users of gcc < 4.9 to observe -Werror=missing-prototypes errors. Indeed, it seems that: extern inline unsigned long native_save_fl(void) { return 0; } compiled with -Werror=missing-prototypes produces this warning in gcc < 4.9, but not gcc >= 4.9. Fixes: d0a8d9378d16 ("x86/paravirt: Make native_save_fl() extern inline"). Reported-by: David Laight Reported-by: Jean Delvare Signed-off-by: Nick Desaulniers Signed-off-by: Thomas Gleixner Cc: hpa@zytor.com Cc: jgross@suse.com Cc: kstewart@linuxfoundation.org Cc: gregkh@linuxfoundation.org Cc: boris.ostrovsky@oracle.com Cc: astrachan@google.com Cc: mka@chromium.org Cc: arnd@arndb.de Cc: tstellar@redhat.com Cc: sedat.dilek@gmail.com Cc: David.Laight@aculab.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180803170550.164688-1-ndesaulniers@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/irqflags.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index c4fc17220df9..c14f2a74b2be 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -13,6 +13,8 @@ * Interrupt control: */ +/* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */ +extern inline unsigned long native_save_fl(void); extern inline unsigned long native_save_fl(void) { unsigned long flags; -- GitLab From aefe1861bc156102ac5d5be18cf781a76537c119 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Jun 2018 15:48:21 -0700 Subject: [PATCH 0676/1291] x86/speculation/l1tf: Increase 32bit PAE __PHYSICAL_PAGE_SHIFT commit 50896e180c6aa3a9c61a26ced99e15d602666a4c upstream L1 Terminal Fault (L1TF) is a speculation related vulnerability. The CPU speculates on PTE entries which do not have the PRESENT bit set, if the content of the resulting physical address is available in the L1D cache. The OS side mitigation makes sure that a !PRESENT PTE entry points to a physical address outside the actually existing and cachable memory space. This is achieved by inverting the upper bits of the PTE. Due to the address space limitations this only works for 64bit and 32bit PAE kernels, but not for 32bit non PAE. This mitigation applies to both host and guest kernels, but in case of a 64bit host (hypervisor) and a 32bit PAE guest, inverting the upper bits of the PAE address space (44bit) is not enough if the host has more than 43 bits of populated memory address space, because the speculation treats the PTE content as a physical host address bypassing EPT. The host (hypervisor) protects itself against the guest by flushing L1D as needed, but pages inside the guest are not protected against attacks from other processes inside the same guest. For the guest the inverted PTE mask has to match the host to provide the full protection for all pages the host could possibly map into the guest. The hosts populated address space is not known to the guest, so the mask must cover the possible maximal host address space, i.e. 52 bit. On 32bit PAE the maximum PTE mask is currently set to 44 bit because that is the limit imposed by 32bit unsigned long PFNs in the VMs. This limits the mask to be below what the host could possible use for physical pages. The L1TF PROT_NONE protection code uses the PTE masks to determine which bits to invert to make sure the higher bits are set for unmapped entries to prevent L1TF speculation attacks against EPT inside guests. In order to invert all bits that could be used by the host, increase __PHYSICAL_PAGE_SHIFT to 52 to match 64bit. The real limit for a 32bit PAE kernel is still 44 bits because all Linux PTEs are created from unsigned long PFNs, so they cannot be higher than 44 bits on a 32bit kernel. So these extra PFN bits should be never set. The only users of this macro are using it to look at PTEs, so it's safe. [ tglx: Massaged changelog ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Michal Hocko Acked-by: Dave Hansen Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/page_32_types.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index aa30c3241ea7..0d5c739eebd7 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -29,8 +29,13 @@ #define N_EXCEPTION_STACKS 1 #ifdef CONFIG_X86_PAE -/* 44=32+12, the limit we can fit into an unsigned long pfn */ -#define __PHYSICAL_MASK_SHIFT 44 +/* + * This is beyond the 44 bit limit imposed by the 32bit long pfns, + * but we need the full mask to make sure inverted PROT_NONE + * entries have all the host bits set in a guest. + * The real limit is still 44 bits. + */ +#define __PHYSICAL_MASK_SHIFT 52 #define __VIRTUAL_MASK_SHIFT 32 #else /* !CONFIG_X86_PAE */ -- GitLab From 39991a7aa8d527164a87f90bb18b07b2699ed7d0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 13 Jun 2018 15:48:22 -0700 Subject: [PATCH 0677/1291] x86/speculation/l1tf: Change order of offset/type in swap entry commit bcd11afa7adad8d720e7ba5ef58bdcd9775cf45f upstream If pages are swapped out, the swap entry is stored in the corresponding PTE, which has the Present bit cleared. CPUs vulnerable to L1TF speculate on PTE entries which have the present bit set and would treat the swap entry as phsyical address (PFN). To mitigate that the upper bits of the PTE must be set so the PTE points to non existent memory. The swap entry stores the type and the offset of a swapped out page in the PTE. type is stored in bit 9-13 and offset in bit 14-63. The hardware ignores the bits beyond the phsyical address space limit, so to make the mitigation effective its required to start 'offset' at the lowest possible bit so that even large swap offsets do not reach into the physical address space limit bits. Move offset to bit 9-58 and type to bit 59-63 which are the bits that hardware generally doesn't care about. That, in turn, means that if you on desktop chip with only 40 bits of physical addressing, now that the offset starts at bit 9, there needs to be 30 bits of offset actually *in use* until bit 39 ends up being set, which means when inverted it will again point into existing memory. So that's 4 terabyte of swap space (because the offset is counted in pages, so 30 bits of offset is 42 bits of actual coverage). With bigger physical addressing, that obviously grows further, until the limit of the offset is hit (at 50 bits of offset - 62 bits of actual swap file coverage). This is a preparatory change for the actual swap entry inversion to protect against L1TF. [ AK: Updated description and minor tweaks. Split into two parts ] [ tglx: Massaged changelog ] Signed-off-by: Linus Torvalds Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Tested-by: Andi Kleen Reviewed-by: Josh Poimboeuf Acked-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Dave Hansen Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable_64.h | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 1149d2112b2e..24e2f7aad03d 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -276,7 +276,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names - * | OFFSET (14->63) | TYPE (9-13) |0|0|X|X| X| X|X|SD|0| <- swp entry + * | TYPE (59-63) | OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above @@ -290,19 +290,28 @@ static inline int pgd_large(pgd_t pgd) { return 0; } * Bit 7 in swp entry should be 0 because pmd_present checks not only P, * but also L and G. */ -#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) -#define SWP_TYPE_BITS 5 -/* Place the offset above the type: */ -#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS) +#define SWP_TYPE_BITS 5 + +#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) + +/* We always extract/encode the offset by shifting it all the way up, and then down again */ +#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS) #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) -#define __swp_type(x) (((x).val >> (SWP_TYPE_FIRST_BIT)) \ - & ((1U << SWP_TYPE_BITS) - 1)) -#define __swp_offset(x) ((x).val >> SWP_OFFSET_FIRST_BIT) -#define __swp_entry(type, offset) ((swp_entry_t) { \ - ((type) << (SWP_TYPE_FIRST_BIT)) \ - | ((offset) << SWP_OFFSET_FIRST_BIT) }) +/* Extract the high bits for type */ +#define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS)) + +/* Shift up (to get rid of type), then down to get value */ +#define __swp_offset(x) ((x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT) + +/* + * Shift the offset up "too far" by TYPE bits, then down again + */ +#define __swp_entry(type, offset) ((swp_entry_t) { \ + ((unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ + | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) }) + #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) -- GitLab From 83ef7e8c0cb72510588ee8b96f5cf30c1ecd9270 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 13 Jun 2018 15:48:23 -0700 Subject: [PATCH 0678/1291] x86/speculation/l1tf: Protect swap entries against L1TF commit 2f22b4cd45b67b3496f4aa4c7180a1271c6452f6 upstream With L1 terminal fault the CPU speculates into unmapped PTEs, and resulting side effects allow to read the memory the PTE is pointing too, if its values are still in the L1 cache. For swapped out pages Linux uses unmapped PTEs and stores a swap entry into them. To protect against L1TF it must be ensured that the swap entry is not pointing to valid memory, which requires setting higher bits (between bit 36 and bit 45) that are inside the CPUs physical address space, but outside any real memory. To do this invert the offset to make sure the higher bits are always set, as long as the swap file is not too big. Note there is no workaround for 32bit !PAE, or on systems which have more than MAX_PA/2 worth of memory. The later case is very unlikely to happen on real systems. [AK: updated description and minor tweaks by. Split out from the original patch ] Signed-off-by: Linus Torvalds Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Tested-by: Andi Kleen Reviewed-by: Josh Poimboeuf Acked-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Dave Hansen Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable_64.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 24e2f7aad03d..1b0bdf8ae0d6 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -276,7 +276,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names - * | TYPE (59-63) | OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry + * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above @@ -289,6 +289,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; } * * Bit 7 in swp entry should be 0 because pmd_present checks not only P, * but also L and G. + * + * The offset is inverted by a binary not operation to make the high + * physical bits set. */ #define SWP_TYPE_BITS 5 @@ -303,13 +306,15 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS)) /* Shift up (to get rid of type), then down to get value */ -#define __swp_offset(x) ((x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT) +#define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT) /* * Shift the offset up "too far" by TYPE bits, then down again + * The offset is inverted by a binary not operation to make the high + * physical bits set. */ #define __swp_entry(type, offset) ((swp_entry_t) { \ - ((unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ + (~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) -- GitLab From 8c35b2fcbe6a86be93aa7cb9c4842e3c70b77620 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Jun 2018 15:48:24 -0700 Subject: [PATCH 0679/1291] x86/speculation/l1tf: Protect PROT_NONE PTEs against speculation commit 6b28baca9b1f0d4a42b865da7a05b1c81424bd5c upstream When PTEs are set to PROT_NONE the kernel just clears the Present bit and preserves the PFN, which creates attack surface for L1TF speculation speculation attacks. This is important inside guests, because L1TF speculation bypasses physical page remapping. While the host has its own migitations preventing leaking data from other VMs into the guest, this would still risk leaking the wrong page inside the current guest. This uses the same technique as Linus' swap entry patch: while an entry is is in PROTNONE state invert the complete PFN part part of it. This ensures that the the highest bit will point to non existing memory. The invert is done by pte/pmd_modify and pfn/pmd/pud_pte for PROTNONE and pte/pmd/pud_pfn undo it. This assume that no code path touches the PFN part of a PTE directly without using these primitives. This doesn't handle the case that MMIO is on the top of the CPU physical memory. If such an MMIO region was exposed by an unpriviledged driver for mmap it would be possible to attack some real memory. However this situation is all rather unlikely. For 32bit non PAE the inversion is not done because there are really not enough bits to protect anything. Q: Why does the guest need to be protected when the HyperVisor already has L1TF mitigations? A: Here's an example: Physical pages 1 2 get mapped into a guest as GPA 1 -> PA 2 GPA 2 -> PA 1 through EPT. The L1TF speculation ignores the EPT remapping. Now the guest kernel maps GPA 1 to process A and GPA 2 to process B, and they belong to different users and should be isolated. A sets the GPA 1 PA 2 PTE to PROT_NONE to bypass the EPT remapping and gets read access to the underlying physical page. Which in this case points to PA 2, so it can read process B's data, if it happened to be in L1, so isolation inside the guest is broken. There's nothing the hypervisor can do about this. This mitigation has to be done in the guest itself. [ tglx: Massaged changelog ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Dave Hansen Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable-2level.h | 17 +++++++++++ arch/x86/include/asm/pgtable-3level.h | 2 ++ arch/x86/include/asm/pgtable-invert.h | 32 +++++++++++++++++++ arch/x86/include/asm/pgtable.h | 44 +++++++++++++++++++-------- arch/x86/include/asm/pgtable_64.h | 2 ++ 5 files changed, 84 insertions(+), 13 deletions(-) create mode 100644 arch/x86/include/asm/pgtable-invert.h diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 685ffe8a0eaf..60d0f9015317 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -95,4 +95,21 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) +/* No inverted PFNs on 2 level page tables */ + +static inline u64 protnone_mask(u64 val) +{ + return 0; +} + +static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) +{ + return val; +} + +static inline bool __pte_needs_invert(u64 val) +{ + return false; +} + #endif /* _ASM_X86_PGTABLE_2LEVEL_H */ diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index bc4af5453802..1ef2be4e9b69 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -260,4 +260,6 @@ static inline pte_t gup_get_pte(pte_t *ptep) return pte; } +#include + #endif /* _ASM_X86_PGTABLE_3LEVEL_H */ diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h new file mode 100644 index 000000000000..177564187fc0 --- /dev/null +++ b/arch/x86/include/asm/pgtable-invert.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_PGTABLE_INVERT_H +#define _ASM_PGTABLE_INVERT_H 1 + +#ifndef __ASSEMBLY__ + +static inline bool __pte_needs_invert(u64 val) +{ + return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE; +} + +/* Get a mask to xor with the page table entry to get the correct pfn. */ +static inline u64 protnone_mask(u64 val) +{ + return __pte_needs_invert(val) ? ~0ull : 0; +} + +static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) +{ + /* + * When a PTE transitions from NONE to !NONE or vice-versa + * invert the PFN part to stop speculation. + * pte_pfn undoes this when needed. + */ + if (__pte_needs_invert(oldval) != __pte_needs_invert(val)) + val = (val & ~mask) | (~val & mask); + return val; +} + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5c790e93657d..0d1a23e7c83b 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -185,19 +185,29 @@ static inline int pte_special(pte_t pte) return pte_flags(pte) & _PAGE_SPECIAL; } +/* Entries that were set to PROT_NONE are inverted */ + +static inline u64 protnone_mask(u64 val); + static inline unsigned long pte_pfn(pte_t pte) { - return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; + unsigned long pfn = pte_val(pte); + pfn ^= protnone_mask(pfn); + return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT; } static inline unsigned long pmd_pfn(pmd_t pmd) { - return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; + unsigned long pfn = pmd_val(pmd); + pfn ^= protnone_mask(pfn); + return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; } static inline unsigned long pud_pfn(pud_t pud) { - return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT; + unsigned long pfn = pud_val(pud); + pfn ^= protnone_mask(pfn); + return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT; } static inline unsigned long p4d_pfn(p4d_t p4d) @@ -528,25 +538,33 @@ static inline pgprotval_t massage_pgprot(pgprot_t pgprot) static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | - massage_pgprot(pgprot)); + phys_addr_t pfn = page_nr << PAGE_SHIFT; + pfn ^= protnone_mask(pgprot_val(pgprot)); + pfn &= PTE_PFN_MASK; + return __pte(pfn | massage_pgprot(pgprot)); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | - massage_pgprot(pgprot)); + phys_addr_t pfn = page_nr << PAGE_SHIFT; + pfn ^= protnone_mask(pgprot_val(pgprot)); + pfn &= PHYSICAL_PMD_PAGE_MASK; + return __pmd(pfn | massage_pgprot(pgprot)); } static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) { - return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) | - massage_pgprot(pgprot)); + phys_addr_t pfn = page_nr << PAGE_SHIFT; + pfn ^= protnone_mask(pgprot_val(pgprot)); + pfn &= PHYSICAL_PUD_PAGE_MASK; + return __pud(pfn | massage_pgprot(pgprot)); } +static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); + static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - pteval_t val = pte_val(pte); + pteval_t val = pte_val(pte), oldval = val; /* * Chop off the NX bit (if present), and add the NX portion of @@ -554,17 +572,17 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) */ val &= _PAGE_CHG_MASK; val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK; - + val = flip_protnone_guard(oldval, val, PTE_PFN_MASK); return __pte(val); } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmdval_t val = pmd_val(pmd); + pmdval_t val = pmd_val(pmd), oldval = val; val &= _HPAGE_CHG_MASK; val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK; - + val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK); return __pmd(val); } diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 1b0bdf8ae0d6..4ecb72831938 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -360,5 +360,7 @@ static inline bool gup_fast_permitted(unsigned long start, int nr_pages, return true; } +#include + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */ -- GitLab From aff6fe17f52815e6ebdbf82b69f3edf669808eb6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Jun 2018 15:48:25 -0700 Subject: [PATCH 0680/1291] x86/speculation/l1tf: Make sure the first page is always reserved commit 10a70416e1f067f6c4efda6ffd8ea96002ac4223 upstream The L1TF workaround doesn't make any attempt to mitigate speculate accesses to the first physical page for zeroed PTEs. Normally it only contains some data from the early real mode BIOS. It's not entirely clear that the first page is reserved in all configurations, so add an extra reservation call to make sure it is really reserved. In most configurations (e.g. with the standard reservations) it's likely a nop. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Dave Hansen Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/setup.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index efbcf5283520..dcb00acb6583 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -852,6 +852,12 @@ void __init setup_arch(char **cmdline_p) memblock_reserve(__pa_symbol(_text), (unsigned long)__bss_stop - (unsigned long)_text); + /* + * Make sure page 0 is always reserved because on systems with + * L1TF its contents can be leaked to user processes. + */ + memblock_reserve(0, PAGE_SIZE); + early_reserve_initrd(); /* -- GitLab From 3d98de691c013ea4e60360db93b885fe9db15c37 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Jun 2018 15:48:26 -0700 Subject: [PATCH 0681/1291] x86/speculation/l1tf: Add sysfs reporting for l1tf commit 17dbca119312b4e8173d4e25ff64262119fcef38 upstream L1TF core kernel workarounds are cheap and normally always enabled, However they still should be reported in sysfs if the system is vulnerable or mitigated. Add the necessary CPU feature/bug bits. - Extend the existing checks for Meltdowns to determine if the system is vulnerable. All CPUs which are not vulnerable to Meltdown are also not vulnerable to L1TF - Check for 32bit non PAE and emit a warning as there is no practical way for mitigation due to the limited physical address bits - If the system has more than MAX_PA/2 physical memory the invert page workarounds don't protect the system against the L1TF attack anymore, because an inverted physical address will also point to valid memory. Print a warning in this case and report that the system is vulnerable. Add a function which returns the PFN limit for the L1TF mitigation, which will be used in follow up patches for sanity and range checks. [ tglx: Renamed the CPU feature bit to L1TF_PTEINV ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Dave Hansen Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/include/asm/processor.h | 5 ++++ arch/x86/kernel/cpu/bugs.c | 40 ++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common.c | 20 +++++++++++++++ drivers/base/cpu.c | 8 ++++++ include/linux/cpu.h | 2 ++ 6 files changed, 77 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 403e97d5e243..30c23f539418 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -219,6 +219,7 @@ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -370,5 +371,6 @@ #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ +#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3222c7746cb1..267a8da7c694 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -180,6 +180,11 @@ extern const struct seq_operations cpuinfo_op; extern void cpu_detect(struct cpuinfo_x86 *c); +static inline unsigned long l1tf_pfn_limit(void) +{ + return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1; +} + extern void early_cpu_init(void); extern void identify_boot_cpu(void); extern void identify_secondary_cpu(struct cpuinfo_x86 *); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1d3bbaa4d768..9f991fc8d545 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -27,9 +27,11 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); +static void __init l1tf_select_mitigation(void); /* * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any @@ -81,6 +83,8 @@ void __init check_bugs(void) */ ssb_select_mitigation(); + l1tf_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -205,6 +209,32 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } +static void __init l1tf_select_mitigation(void) +{ + u64 half_pa; + + if (!boot_cpu_has_bug(X86_BUG_L1TF)) + return; + +#if CONFIG_PGTABLE_LEVELS == 2 + pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n"); + return; +#endif + + /* + * This is extremely unlikely to happen because almost all + * systems have far more MAX_PA/2 than RAM can be fit into + * DIMM slots. + */ + half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; + if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) { + pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); + return; + } + + setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV); +} + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -657,6 +687,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); + case X86_BUG_L1TF: + if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) + return sprintf(buf, "Mitigation: Page Table Inversion\n"); + break; + default: break; } @@ -683,4 +718,9 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute * { return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS); } + +ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_L1TF); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 48e98964ecad..b7cee3424bcd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -933,6 +933,21 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { {} }; +static const __initconst struct x86_cpu_id cpu_no_l1tf[] = { + /* in addition to cpu_no_speculation */ + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MOOREFIELD }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GEMINI_LAKE }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, + {} +}; + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; @@ -958,6 +973,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) return; setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + + if (x86_match_cpu(cpu_no_l1tf)) + return; + + setup_force_cpu_bug(X86_BUG_L1TF); } /* diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 433f14bcab15..93758b528d8f 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -527,16 +527,24 @@ ssize_t __weak cpu_show_spec_store_bypass(struct device *dev, return sprintf(buf, "Not affected\n"); } +ssize_t __weak cpu_show_l1tf(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); +static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, &dev_attr_spectre_v1.attr, &dev_attr_spectre_v2.attr, &dev_attr_spec_store_bypass.attr, + &dev_attr_l1tf.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index dc491b3cf4a6..2ce5a5b36786 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -55,6 +55,8 @@ extern ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_l1tf(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- GitLab From a4116334be3921b0309a9bfa6f808c4663e45f3c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Jun 2018 15:48:27 -0700 Subject: [PATCH 0682/1291] x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE mappings commit 42e4089c7890725fcd329999252dc489b72f2921 upstream For L1TF PROT_NONE mappings are protected by inverting the PFN in the page table entry. This sets the high bits in the CPU's address space, thus making sure to point to not point an unmapped entry to valid cached memory. Some server system BIOSes put the MMIO mappings high up in the physical address space. If such an high mapping was mapped to unprivileged users they could attack low memory by setting such a mapping to PROT_NONE. This could happen through a special device driver which is not access protected. Normal /dev/mem is of course access protected. To avoid this forbid PROT_NONE mappings or mprotect for high MMIO mappings. Valid page mappings are allowed because the system is then unsafe anyways. It's not expected that users commonly use PROT_NONE on MMIO. But to minimize any impact this is only enforced if the mapping actually refers to a high MMIO address (defined as the MAX_PA-1 bit being set), and also skip the check for root. For mmaps this is straight forward and can be handled in vm_insert_pfn and in remap_pfn_range(). For mprotect it's a bit trickier. At the point where the actual PTEs are accessed a lot of state has been changed and it would be difficult to undo on an error. Since this is a uncommon case use a separate early page talk walk pass for MMIO PROT_NONE mappings that checks for this condition early. For non MMIO and non PROT_NONE there are no changes. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Dave Hansen Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable.h | 8 ++++++ arch/x86/mm/mmap.c | 21 +++++++++++++++ include/asm-generic/pgtable.h | 12 +++++++++ mm/memory.c | 37 ++++++++++++++++++------- mm/mprotect.c | 49 ++++++++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 0d1a23e7c83b..a242560af32f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1292,6 +1292,14 @@ static inline bool pud_access_permitted(pud_t pud, bool write) return __pte_access_permitted(pud_val(pud), write); } +#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1 +extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot); + +static inline bool arch_has_pfn_modify_check(void) +{ + return boot_cpu_has_bug(X86_BUG_L1TF); +} + #include #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index a99679826846..5f4805d69aab 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -174,3 +174,24 @@ const char *arch_vma_name(struct vm_area_struct *vma) return "[mpx]"; return NULL; } + +/* + * Only allow root to set high MMIO mappings to PROT_NONE. + * This prevents an unpriv. user to set them to PROT_NONE and invert + * them, then pointing to valid memory for L1TF speculation. + * + * Note: for locked down kernels may want to disable the root override. + */ +bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) +{ + if (!boot_cpu_has_bug(X86_BUG_L1TF)) + return true; + if (!__pte_needs_invert(pgprot_val(prot))) + return true; + /* If it's real memory always allow */ + if (pfn_valid(pfn)) + return true; + if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN)) + return false; + return true; +} diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 2142bceaeb75..f50d253f213f 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1069,4 +1069,16 @@ static inline void init_espfix_bsp(void) { } #endif #endif +#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED +static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) +{ + return true; +} + +static inline bool arch_has_pfn_modify_check(void) +{ + return false; +} +#endif + #endif /* _ASM_GENERIC_PGTABLE_H */ diff --git a/mm/memory.c b/mm/memory.c index fc7779165dcf..5539b1975091 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1887,6 +1887,9 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; + if (!pfn_modify_allowed(pfn, pgprot)) + return -EACCES; + track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV)); ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot, @@ -1908,6 +1911,9 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, track_pfn_insert(vma, &pgprot, pfn); + if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot)) + return -EACCES; + /* * If we don't have pte special, then we have to use the pfn_valid() * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must* @@ -1955,6 +1961,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, { pte_t *pte; spinlock_t *ptl; + int err = 0; pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) @@ -1962,12 +1969,16 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, arch_enter_lazy_mmu_mode(); do { BUG_ON(!pte_none(*pte)); + if (!pfn_modify_allowed(pfn, prot)) { + err = -EACCES; + break; + } set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot))); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); - return 0; + return err; } static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, @@ -1976,6 +1987,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, { pmd_t *pmd; unsigned long next; + int err; pfn -= addr >> PAGE_SHIFT; pmd = pmd_alloc(mm, pud, addr); @@ -1984,9 +1996,10 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, VM_BUG_ON(pmd_trans_huge(*pmd)); do { next = pmd_addr_end(addr, end); - if (remap_pte_range(mm, pmd, addr, next, - pfn + (addr >> PAGE_SHIFT), prot)) - return -ENOMEM; + err = remap_pte_range(mm, pmd, addr, next, + pfn + (addr >> PAGE_SHIFT), prot); + if (err) + return err; } while (pmd++, addr = next, addr != end); return 0; } @@ -1997,6 +2010,7 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d, { pud_t *pud; unsigned long next; + int err; pfn -= addr >> PAGE_SHIFT; pud = pud_alloc(mm, p4d, addr); @@ -2004,9 +2018,10 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d, return -ENOMEM; do { next = pud_addr_end(addr, end); - if (remap_pmd_range(mm, pud, addr, next, - pfn + (addr >> PAGE_SHIFT), prot)) - return -ENOMEM; + err = remap_pmd_range(mm, pud, addr, next, + pfn + (addr >> PAGE_SHIFT), prot); + if (err) + return err; } while (pud++, addr = next, addr != end); return 0; } @@ -2017,6 +2032,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, { p4d_t *p4d; unsigned long next; + int err; pfn -= addr >> PAGE_SHIFT; p4d = p4d_alloc(mm, pgd, addr); @@ -2024,9 +2040,10 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, return -ENOMEM; do { next = p4d_addr_end(addr, end); - if (remap_pud_range(mm, p4d, addr, next, - pfn + (addr >> PAGE_SHIFT), prot)) - return -ENOMEM; + err = remap_pud_range(mm, p4d, addr, next, + pfn + (addr >> PAGE_SHIFT), prot); + if (err) + return err; } while (p4d++, addr = next, addr != end); return 0; } diff --git a/mm/mprotect.c b/mm/mprotect.c index 58b629bb70de..60864e19421e 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -292,6 +292,42 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, return pages; } +static int prot_none_pte_entry(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? + 0 : -EACCES; +} + +static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long next, + struct mm_walk *walk) +{ + return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? + 0 : -EACCES; +} + +static int prot_none_test(unsigned long addr, unsigned long next, + struct mm_walk *walk) +{ + return 0; +} + +static int prot_none_walk(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long newflags) +{ + pgprot_t new_pgprot = vm_get_page_prot(newflags); + struct mm_walk prot_none_walk = { + .pte_entry = prot_none_pte_entry, + .hugetlb_entry = prot_none_hugetlb_entry, + .test_walk = prot_none_test, + .mm = current->mm, + .private = &new_pgprot, + }; + + return walk_page_range(start, end, &prot_none_walk); +} + int mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags) @@ -309,6 +345,19 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, return 0; } + /* + * Do PROT_NONE PFN permission checks here when we can still + * bail out without undoing a lot of state. This is a rather + * uncommon case, so doesn't need to be very optimized. + */ + if (arch_has_pfn_modify_check() && + (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && + (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) { + error = prot_none_walk(vma, start, end, newflags); + if (error) + return error; + } + /* * If we make a private mapping writable we increase our commit; * but (without finer accounting) cannot reduce our commit if we -- GitLab From 4bb1a8d8f832084df867351e2b8240e957cac2ec Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Jun 2018 15:48:28 -0700 Subject: [PATCH 0683/1291] x86/speculation/l1tf: Limit swap file size to MAX_PA/2 commit 377eeaa8e11fe815b1d07c81c4a0e2843a8c15eb upstream For the L1TF workaround its necessary to limit the swap file size to below MAX_PA/2, so that the higher bits of the swap offset inverted never point to valid memory. Add a mechanism for the architecture to override the swap file size check in swapfile.c and add a x86 specific max swapfile check function that enforces that limit. The check is only enabled if the CPU is vulnerable to L1TF. In VMs with 42bit MAX_PA the typical limit is 2TB now, on a native system with 46bit PA it is 32TB. The limit is only per individual swap file, so it's always possible to exceed these limits with multiple swap files or partitions. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Michal Hocko Acked-by: Dave Hansen Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init.c | 15 +++++++++++++ include/linux/swapfile.h | 2 ++ mm/swapfile.c | 46 ++++++++++++++++++++++++++-------------- 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 071cbbbb60d9..a21cf622a65e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -4,6 +4,8 @@ #include #include #include /* for max_low_pfn */ +#include +#include #include #include @@ -880,3 +882,16 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) __cachemode2pte_tbl[cache] = __cm_idx2pte(entry); __pte2cachemode_tbl[entry] = cache; } + +unsigned long max_swapfile_size(void) +{ + unsigned long pages; + + pages = generic_max_swapfile_size(); + + if (boot_cpu_has_bug(X86_BUG_L1TF)) { + /* Limit the swap file size to MAX_PA/2 for L1TF workaround */ + pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages); + } + return pages; +} diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h index 06bd7b096167..e06febf62978 100644 --- a/include/linux/swapfile.h +++ b/include/linux/swapfile.h @@ -10,5 +10,7 @@ extern spinlock_t swap_lock; extern struct plist_head swap_active_head; extern struct swap_info_struct *swap_info[]; extern int try_to_unuse(unsigned int, bool, unsigned long); +extern unsigned long generic_max_swapfile_size(void); +extern unsigned long max_swapfile_size(void); #endif /* _LINUX_SWAPFILE_H */ diff --git a/mm/swapfile.c b/mm/swapfile.c index 03d2ce288d83..8cbc7d6fd52e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2902,6 +2902,35 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) return 0; } + +/* + * Find out how many pages are allowed for a single swap device. There + * are two limiting factors: + * 1) the number of bits for the swap offset in the swp_entry_t type, and + * 2) the number of bits in the swap pte, as defined by the different + * architectures. + * + * In order to find the largest possible bit mask, a swap entry with + * swap type 0 and swap offset ~0UL is created, encoded to a swap pte, + * decoded to a swp_entry_t again, and finally the swap offset is + * extracted. + * + * This will mask all the bits from the initial ~0UL mask that can't + * be encoded in either the swp_entry_t or the architecture definition + * of a swap pte. + */ +unsigned long generic_max_swapfile_size(void) +{ + return swp_offset(pte_to_swp_entry( + swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; +} + +/* Can be overridden by an architecture for additional checks. */ +__weak unsigned long max_swapfile_size(void) +{ + return generic_max_swapfile_size(); +} + static unsigned long read_swap_header(struct swap_info_struct *p, union swap_header *swap_header, struct inode *inode) @@ -2937,22 +2966,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p, p->cluster_next = 1; p->cluster_nr = 0; - /* - * Find out how many pages are allowed for a single swap - * device. There are two limiting factors: 1) the number - * of bits for the swap offset in the swp_entry_t type, and - * 2) the number of bits in the swap pte as defined by the - * different architectures. In order to find the - * largest possible bit mask, a swap entry with swap type 0 - * and swap offset ~0UL is created, encoded to a swap pte, - * decoded to a swp_entry_t again, and finally the swap - * offset is extracted. This will mask all the bits from - * the initial ~0UL mask that can't be encoded in either - * the swp_entry_t or the architecture definition of a - * swap pte. - */ - maxpages = swp_offset(pte_to_swp_entry( - swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; + maxpages = max_swapfile_size(); last_page = swap_header->info.last_page; if (!last_page) { pr_warn("Empty swap-file\n"); -- GitLab From 202f9056085f9380ff82fff5218e343c3f244956 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Jun 2018 16:42:57 -0400 Subject: [PATCH 0684/1291] x86/bugs: Move the l1tf function and define pr_fmt properly commit 56563f53d3066afa9e63d6c997bf67e76a8b05c0 upstream The pr_warn in l1tf_select_mitigation would have used the prior pr_fmt which was defined as "Spectre V2 : ". Move the function to be past SSBD and also define the pr_fmt. Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf") Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 55 ++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9f991fc8d545..4dab6fb9db9b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -209,32 +209,6 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } -static void __init l1tf_select_mitigation(void) -{ - u64 half_pa; - - if (!boot_cpu_has_bug(X86_BUG_L1TF)) - return; - -#if CONFIG_PGTABLE_LEVELS == 2 - pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n"); - return; -#endif - - /* - * This is extremely unlikely to happen because almost all - * systems have far more MAX_PA/2 than RAM can be fit into - * DIMM slots. - */ - half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; - if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) { - pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); - return; - } - - setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV); -} - #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -660,6 +634,35 @@ void x86_spec_ctrl_setup_ap(void) x86_amd_ssb_disable(); } +#undef pr_fmt +#define pr_fmt(fmt) "L1TF: " fmt +static void __init l1tf_select_mitigation(void) +{ + u64 half_pa; + + if (!boot_cpu_has_bug(X86_BUG_L1TF)) + return; + +#if CONFIG_PGTABLE_LEVELS == 2 + pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n"); + return; +#endif + + /* + * This is extremely unlikely to happen because almost all + * systems have far more MAX_PA/2 than RAM can be fit into + * DIMM slots. + */ + half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; + if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) { + pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); + return; + } + + setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV); +} +#undef pr_fmt + #ifdef CONFIG_SYSFS static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, -- GitLab From fc890e9b571fb273e714bae5de682226eaed9cb2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 29 May 2018 16:43:46 +0200 Subject: [PATCH 0685/1291] sched/smt: Update sched_smt_present at runtime commit ba2591a5993eabcc8e874e30f361d8ffbb10d6d4 upstream The static key sched_smt_present is only updated at boot time when SMT siblings have been detected. Booting with maxcpus=1 and bringing the siblings online after boot rebuilds the scheduling domains correctly but does not update the static key, so the SMT code is not enabled. Let the key be updated in the scheduler CPU hotplug code to fix this. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 30 ++++++++++++------------------ kernel/sched/fair.c | 1 + 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 31615d1ae44c..4e89ed8a0fb2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5615,6 +5615,18 @@ int sched_cpu_activate(unsigned int cpu) struct rq *rq = cpu_rq(cpu); struct rq_flags rf; +#ifdef CONFIG_SCHED_SMT + /* + * The sched_smt_present static key needs to be evaluated on every + * hotplug event because at boot time SMT might be disabled when + * the number of booted CPUs is limited. + * + * If then later a sibling gets hotplugged, then the key would stay + * off and SMT scheduling would never be functional. + */ + if (cpumask_weight(cpu_smt_mask(cpu)) > 1) + static_branch_enable_cpuslocked(&sched_smt_present); +#endif set_cpu_active(cpu, true); if (sched_smp_initialized) { @@ -5710,22 +5722,6 @@ int sched_cpu_dying(unsigned int cpu) } #endif -#ifdef CONFIG_SCHED_SMT -DEFINE_STATIC_KEY_FALSE(sched_smt_present); - -static void sched_init_smt(void) -{ - /* - * We've enumerated all CPUs and will assume that if any CPU - * has SMT siblings, CPU0 will too. - */ - if (cpumask_weight(cpu_smt_mask(0)) > 1) - static_branch_enable(&sched_smt_present); -} -#else -static inline void sched_init_smt(void) { } -#endif - void __init sched_init_smp(void) { cpumask_var_t non_isolated_cpus; @@ -5755,8 +5751,6 @@ void __init sched_init_smp(void) init_sched_rt_class(); init_sched_dl_class(); - sched_init_smt(); - sched_smp_initialized = true; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5c09ddf8c832..0cc7098c6dfd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5631,6 +5631,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) } #ifdef CONFIG_SCHED_SMT +DEFINE_STATIC_KEY_FALSE(sched_smt_present); static inline void set_idle_cores(int cpu, int val) { -- GitLab From b84b9184651dd2a003459293ec7caa58e5423562 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 29 May 2018 17:50:22 +0200 Subject: [PATCH 0686/1291] x86/smp: Provide topology_is_primary_thread() commit 6a4d2657e048f096c7ffcad254010bd94891c8c0 upstream If the CPU is supporting SMT then the primary thread can be found by checking the lower APIC ID bits for zero. smp_num_siblings is used to build the mask for the APIC ID bits which need to be taken into account. This uses the MPTABLE or ACPI/MADT supplied APIC ID, which can be different than the initial APIC ID in CPUID. But according to AMD the lower bits have to be consistent. Intel gave a tentative confirmation as well. Preparatory patch to support disabling SMT at boot/runtime. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/apic.h | 7 +++++++ arch/x86/include/asm/topology.h | 4 +++- arch/x86/kernel/apic/apic.c | 15 +++++++++++++++ arch/x86/kernel/smpboot.c | 9 +++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 5f01671c68f2..77ffd3b7894f 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -613,6 +613,13 @@ extern int default_check_phys_apicid_present(int phys_apicid); #endif #endif /* CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_SMP +bool apic_id_is_primary_thread(unsigned int id); +#else +static inline bool apic_id_is_primary_thread(unsigned int id) { return false; } +#endif + extern void irq_enter(void); extern void irq_exit(void); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index c1d2a9892352..661e361a12a6 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -123,13 +123,15 @@ static inline int topology_max_smt_threads(void) } int topology_update_package_map(unsigned int apicid, unsigned int cpu); -extern int topology_phys_to_logical_pkg(unsigned int pkg); +int topology_phys_to_logical_pkg(unsigned int pkg); +bool topology_is_primary_thread(unsigned int cpu); #else #define topology_max_packages() (1) static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_smt_threads(void) { return 1; } +static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } #endif static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f48a51335538..0a10d3ed77ce 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2092,6 +2092,21 @@ static int cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = -1, }; +/** + * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread + * @id: APIC ID to check + */ +bool apic_id_is_primary_thread(unsigned int apicid) +{ + u32 mask; + + if (smp_num_siblings == 1) + return true; + /* Isolate the SMT bit(s) in the APICID and check for 0 */ + mask = (1U << (fls(smp_num_siblings) - 1)) - 1; + return !(apicid & mask); +} + /* * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids * and cpuid_to_apicid[] synchronized. diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 344d3c160f8d..e817e0904254 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -311,6 +311,15 @@ int topology_update_package_map(unsigned int pkg, unsigned int cpu) return 0; } +/** + * topology_is_primary_thread - Check whether CPU is the primary SMT thread + * @cpu: CPU to check + */ +bool topology_is_primary_thread(unsigned int cpu) +{ + return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu)); +} + /** * topology_phys_to_logical_pkg - Map a physical package id to a logical * -- GitLab From 8eb2860590ba188c8d9fbd5497ac4bfd9848201b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Jun 2018 10:37:20 +0200 Subject: [PATCH 0687/1291] x86/topology: Provide topology_smt_supported() commit f048c399e0f7490ab7296bc2c255d37eb14a9675 upstream Provide information whether SMT is supoorted by the CPUs. Preparatory patch for SMT control mechanism. Suggested-by: Dave Hansen Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/topology.h | 2 ++ arch/x86/kernel/smpboot.c | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 661e361a12a6..453cf38a1c33 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -125,6 +125,7 @@ static inline int topology_max_smt_threads(void) int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); bool topology_is_primary_thread(unsigned int cpu); +bool topology_smt_supported(void); #else #define topology_max_packages() (1) static inline int @@ -132,6 +133,7 @@ topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } +static inline bool topology_smt_supported(void) { return false; } #endif static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e817e0904254..60dc95d7d96d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -320,6 +320,14 @@ bool topology_is_primary_thread(unsigned int cpu) return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu)); } +/** + * topology_smt_supported - Check whether SMT is supported by the CPUs + */ +bool topology_smt_supported(void) +{ + return smp_num_siblings > 1; +} + /** * topology_phys_to_logical_pkg - Map a physical package id to a logical * -- GitLab From 26a6dcc7134b7f45e0058cd735148afd2715f67a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 29 May 2018 19:05:25 +0200 Subject: [PATCH 0688/1291] cpu/hotplug: Make bringup/teardown of smp threads symmetric commit c4de65696d865c225fda3b9913b31284ea65ea96 upstream The asymmetry caused a warning to trigger if the bootup was stopped in state CPUHP_AP_ONLINE_IDLE. The warning no longer triggers as kthread_park() can now be invoked on already or still parked threads. But there is still no reason to have this be asymmetric. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index d6affe6bd6b6..ac08cfbb9167 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -758,7 +758,6 @@ static int takedown_cpu(unsigned int cpu) /* Park the smpboot threads */ kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread); - smpboot_park_threads(cpu); /* * Prevent irq alloc/free while the dying cpu reorganizes the @@ -1344,7 +1343,7 @@ static struct cpuhp_step cpuhp_ap_states[] = { [CPUHP_AP_SMPBOOT_THREADS] = { .name = "smpboot/threads:online", .startup.single = smpboot_unpark_threads, - .teardown.single = NULL, + .teardown.single = smpboot_park_threads, }, [CPUHP_AP_IRQ_AFFINITY_ONLINE] = { .name = "irq/affinity:online", -- GitLab From 6beba29c66bcab61457173c7c36638fa3f5824b5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 29 May 2018 17:49:05 +0200 Subject: [PATCH 0689/1291] cpu/hotplug: Split do_cpu_down() commit cc1fe215e1efa406b03aa4389e6269b61342dec5 upstream Split out the inner workings of do_cpu_down() to allow reuse of that function for the upcoming SMT disabling mechanism. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index ac08cfbb9167..b419da4b3ba9 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -910,20 +910,19 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, return ret; } +static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target) +{ + if (cpu_hotplug_disabled) + return -EBUSY; + return _cpu_down(cpu, 0, target); +} + static int do_cpu_down(unsigned int cpu, enum cpuhp_state target) { int err; cpu_maps_update_begin(); - - if (cpu_hotplug_disabled) { - err = -EBUSY; - goto out; - } - - err = _cpu_down(cpu, 0, target); - -out: + err = cpu_down_maps_locked(cpu, target); cpu_maps_update_done(); return err; } -- GitLab From c5ac43ee8c77b1a38d5223bb8a688b2116f1f958 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 29 May 2018 17:48:27 +0200 Subject: [PATCH 0690/1291] cpu/hotplug: Provide knobs to control SMT commit 05736e4ac13c08a4a9b1ef2de26dd31a32cbee57 upstream Provide a command line and a sysfs knob to control SMT. The command line options are: 'nosmt': Enumerate secondary threads, but do not online them 'nosmt=force': Ignore secondary threads completely during enumeration via MP table and ACPI/MADT. The sysfs control file has the following states (read/write): 'on': SMT is enabled. Secondary threads can be freely onlined 'off': SMT is disabled. Secondary threads, even if enumerated cannot be onlined 'forceoff': SMT is permanentely disabled. Writes to the control file are rejected. 'notsupported': SMT is not supported by the CPU The command line option 'nosmt' sets the sysfs control to 'off'. This can be changed to 'on' to reenable SMT during runtime. The command line option 'nosmt=force' sets the sysfs control to 'forceoff'. This cannot be changed during runtime. When SMT is 'on' and the control file is changed to 'off' then all online secondary threads are offlined and attempts to online a secondary thread later on are rejected. When SMT is 'off' and the control file is changed to 'on' then secondary threads can be onlined again. The 'off' -> 'on' transition does not automatically online the secondary threads. When the control file is set to 'forceoff', the behaviour is the same as setting it to 'off', but the operation is irreversible and later writes to the control file are rejected. When the control status is 'notsupported' then writes to the control file are rejected. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-system-cpu | 20 +++ .../admin-guide/kernel-parameters.txt | 8 + arch/Kconfig | 3 + arch/x86/Kconfig | 1 + include/linux/cpu.h | 13 ++ kernel/cpu.c | 170 ++++++++++++++++++ 6 files changed, 215 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 8355e79350b7..c9c9e60d3e7c 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -390,3 +390,23 @@ Description: Information about CPU vulnerabilities "Not affected" CPU is not affected by the vulnerability "Vulnerable" CPU is affected and no mitigation in effect "Mitigation: $M" CPU is affected and mitigation $M is in effect + +What: /sys/devices/system/cpu/smt + /sys/devices/system/cpu/smt/active + /sys/devices/system/cpu/smt/control +Date: June 2018 +Contact: Linux kernel mailing list +Description: Control Symetric Multi Threading (SMT) + + active: Tells whether SMT is active (enabled and siblings online) + + control: Read/write interface to control SMT. Possible + values: + + "on" SMT is enabled + "off" SMT is disabled + "forceoff" SMT is force disabled. Cannot be changed. + "notsupported" SMT is not supported by the CPU + + If control status is "forceoff" or "notsupported" writes + are rejected. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d6d7669e667f..38d75cc78a3d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2595,6 +2595,14 @@ nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. + [KNL,x86] Disable symmetric multithreading (SMT). + nosmt=force: Force disable SMT, similar to disabling + it in the BIOS except that some of the + resource partitioning effects which are + caused by having SMT enabled in the BIOS + cannot be undone. Depending on the CPU + type this might have a performance impact. + nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2 (indirect branch prediction) vulnerability. System may allow data leaks with this option, which is equivalent diff --git a/arch/Kconfig b/arch/Kconfig index 400b9e1b2f27..4e01862f58e4 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -13,6 +13,9 @@ config KEXEC_CORE config HAVE_IMA_KEXEC bool +config HOTPLUG_SMT + bool + config OPROFILE tristate "OProfile system profiling" depends on PROFILING diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7483cd514c32..1c63a4b5320d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -176,6 +176,7 @@ config X86 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER + select HOTPLUG_SMT if SMP select IRQ_FORCED_THREADING select PCI_LOCKLESS_CONFIG select PERF_EVENTS diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 2ce5a5b36786..d7f08316dce6 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -178,4 +178,17 @@ void cpuhp_report_idle_dead(void); static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ +enum cpuhp_smt_control { + CPU_SMT_ENABLED, + CPU_SMT_DISABLED, + CPU_SMT_FORCE_DISABLED, + CPU_SMT_NOT_SUPPORTED, +}; + +#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) +extern enum cpuhp_smt_control cpu_smt_control; +#else +# define cpu_smt_control (CPU_SMT_ENABLED) +#endif + #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/cpu.c b/kernel/cpu.c index b419da4b3ba9..55a93d360921 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -937,6 +937,29 @@ EXPORT_SYMBOL(cpu_down); #define takedown_cpu NULL #endif /*CONFIG_HOTPLUG_CPU*/ +#ifdef CONFIG_HOTPLUG_SMT +enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; + +static int __init smt_cmdline_disable(char *str) +{ + cpu_smt_control = CPU_SMT_DISABLED; + if (str && !strcmp(str, "force")) { + pr_info("SMT: Force disabled\n"); + cpu_smt_control = CPU_SMT_FORCE_DISABLED; + } + return 0; +} +early_param("nosmt", smt_cmdline_disable); + +static inline bool cpu_smt_allowed(unsigned int cpu) +{ + return cpu_smt_control == CPU_SMT_ENABLED || + topology_is_primary_thread(cpu); +} +#else +static inline bool cpu_smt_allowed(unsigned int cpu) { return true; } +#endif + /** * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU * @cpu: cpu that just started @@ -1060,6 +1083,10 @@ static int do_cpu_up(unsigned int cpu, enum cpuhp_state target) err = -EBUSY; goto out; } + if (!cpu_smt_allowed(cpu)) { + err = -EPERM; + goto out; + } err = _cpu_up(cpu, 0, target); out: @@ -1916,10 +1943,153 @@ static const struct attribute_group cpuhp_cpu_root_attr_group = { NULL }; +#ifdef CONFIG_HOTPLUG_SMT + +static const char *smt_states[] = { + [CPU_SMT_ENABLED] = "on", + [CPU_SMT_DISABLED] = "off", + [CPU_SMT_FORCE_DISABLED] = "forceoff", + [CPU_SMT_NOT_SUPPORTED] = "notsupported", +}; + +static ssize_t +show_smt_control(struct device *dev, struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]); +} + +static void cpuhp_offline_cpu_device(unsigned int cpu) +{ + struct device *dev = get_cpu_device(cpu); + + dev->offline = true; + /* Tell user space about the state change */ + kobject_uevent(&dev->kobj, KOBJ_OFFLINE); +} + +static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) +{ + int cpu, ret = 0; + + cpu_maps_update_begin(); + for_each_online_cpu(cpu) { + if (topology_is_primary_thread(cpu)) + continue; + ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE); + if (ret) + break; + /* + * As this needs to hold the cpu maps lock it's impossible + * to call device_offline() because that ends up calling + * cpu_down() which takes cpu maps lock. cpu maps lock + * needs to be held as this might race against in kernel + * abusers of the hotplug machinery (thermal management). + * + * So nothing would update device:offline state. That would + * leave the sysfs entry stale and prevent onlining after + * smt control has been changed to 'off' again. This is + * called under the sysfs hotplug lock, so it is properly + * serialized against the regular offline usage. + */ + cpuhp_offline_cpu_device(cpu); + } + if (!ret) + cpu_smt_control = ctrlval; + cpu_maps_update_done(); + return ret; +} + +static void cpuhp_smt_enable(void) +{ + cpu_maps_update_begin(); + cpu_smt_control = CPU_SMT_ENABLED; + cpu_maps_update_done(); +} + +static ssize_t +store_smt_control(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + int ctrlval, ret; + + if (sysfs_streq(buf, "on")) + ctrlval = CPU_SMT_ENABLED; + else if (sysfs_streq(buf, "off")) + ctrlval = CPU_SMT_DISABLED; + else if (sysfs_streq(buf, "forceoff")) + ctrlval = CPU_SMT_FORCE_DISABLED; + else + return -EINVAL; + + if (cpu_smt_control == CPU_SMT_FORCE_DISABLED) + return -EPERM; + + if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + return -ENODEV; + + ret = lock_device_hotplug_sysfs(); + if (ret) + return ret; + + if (ctrlval != cpu_smt_control) { + switch (ctrlval) { + case CPU_SMT_ENABLED: + cpuhp_smt_enable(); + break; + case CPU_SMT_DISABLED: + case CPU_SMT_FORCE_DISABLED: + ret = cpuhp_smt_disable(ctrlval); + break; + } + } + + unlock_device_hotplug(); + return ret ? ret : count; +} +static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control); + +static ssize_t +show_smt_active(struct device *dev, struct device_attribute *attr, char *buf) +{ + bool active = topology_max_smt_threads() > 1; + + return snprintf(buf, PAGE_SIZE - 2, "%d\n", active); +} +static DEVICE_ATTR(active, 0444, show_smt_active, NULL); + +static struct attribute *cpuhp_smt_attrs[] = { + &dev_attr_control.attr, + &dev_attr_active.attr, + NULL +}; + +static const struct attribute_group cpuhp_smt_attr_group = { + .attrs = cpuhp_smt_attrs, + .name = "smt", + NULL +}; + +static int __init cpu_smt_state_init(void) +{ + if (!topology_smt_supported()) + cpu_smt_control = CPU_SMT_NOT_SUPPORTED; + + return sysfs_create_group(&cpu_subsys.dev_root->kobj, + &cpuhp_smt_attr_group); +} + +#else +static inline int cpu_smt_state_init(void) { return 0; } +#endif + static int __init cpuhp_sysfs_init(void) { int cpu, ret; + ret = cpu_smt_state_init(); + if (ret) + return ret; + ret = sysfs_create_group(&cpu_subsys.dev_root->kobj, &cpuhp_cpu_root_attr_group); if (ret) -- GitLab From 728ac48249f620182eb7ad065a0d7064b6f17e2b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Jun 2018 00:36:15 +0200 Subject: [PATCH 0691/1291] x86/cpu: Remove the pointless CPU printout commit 55e6d279abd92cfd7576bba031e7589be8475edb upstream The value of this printout is dubious at best and there is no point in having it in two different places along with convoluted ways to reach it. Remove it completely. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 20 +++++--------------- arch/x86/kernel/cpu/topology.c | 11 ----------- 2 files changed, 5 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b7cee3424bcd..835cd1c3400e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -619,13 +619,12 @@ void detect_ht(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP u32 eax, ebx, ecx, edx; int index_msb, core_bits; - static bool printed; if (!cpu_has(c, X86_FEATURE_HT)) return; if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) - goto out; + return; if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) return; @@ -634,14 +633,14 @@ void detect_ht(struct cpuinfo_x86 *c) smp_num_siblings = (ebx & 0xff0000) >> 16; + if (!smp_num_siblings) + smp_num_siblings = 1; + if (smp_num_siblings == 1) { pr_info_once("CPU0: Hyper-Threading is disabled\n"); - goto out; + return; } - if (smp_num_siblings <= 1) - goto out; - index_msb = get_count_order(smp_num_siblings); c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); @@ -653,15 +652,6 @@ void detect_ht(struct cpuinfo_x86 *c) c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); - -out: - if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { - pr_info("CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - pr_info("CPU: Processor Core ID: %d\n", - c->cpu_core_id); - printed = 1; - } #endif } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index b099024d339c..938276f98ec1 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -33,7 +33,6 @@ void detect_extended_topology(struct cpuinfo_x86 *c) unsigned int eax, ebx, ecx, edx, sub_index; unsigned int ht_mask_width, core_plus_mask_width; unsigned int core_select_mask, core_level_siblings; - static bool printed; if (c->cpuid_level < 0xb) return; @@ -86,15 +85,5 @@ void detect_extended_topology(struct cpuinfo_x86 *c) c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); c->x86_max_cores = (core_level_siblings / smp_num_siblings); - - if (!printed) { - pr_info("CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - if (c->x86_max_cores > 1) - pr_info("CPU: Processor Core ID: %d\n", - c->cpu_core_id); - printed = 1; - } - return; #endif } -- GitLab From 33c8be23981353f141937b37e0659eb5c1857c02 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Jun 2018 00:47:10 +0200 Subject: [PATCH 0692/1291] x86/cpu/AMD: Remove the pointless detect_ht() call commit 44ca36de56d1bf196dca2eb67cd753a46961ffe6 upstream Real 32bit AMD CPUs do not have SMT and the only value of the call was to reach the magic printout which got removed. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 90574f731c05..c17da9cd7de1 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -841,10 +841,6 @@ static void init_amd(struct cpuinfo_x86 *c) srat_detect_node(c); } -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - init_amd_cacheinfo(c); if (c->x86 >= 0xf) -- GitLab From bfa4f8aeb0f38c68b3060b9c27ee533c89404ad1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Jun 2018 00:53:57 +0200 Subject: [PATCH 0693/1291] x86/cpu/common: Provide detect_ht_early() commit 545401f4448a807b963ff17b575e0a393e68b523 upstream To support force disabling of SMT it's required to know the number of thread siblings early. detect_ht() cannot be called before the APIC driver is selected, so split out the part which initializes smp_num_siblings. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 24 ++++++++++++++---------- arch/x86/kernel/cpu/cpu.h | 1 + 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 835cd1c3400e..c925498224d0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -614,32 +614,36 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c) tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]); } -void detect_ht(struct cpuinfo_x86 *c) +int detect_ht_early(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP u32 eax, ebx, ecx, edx; - int index_msb, core_bits; if (!cpu_has(c, X86_FEATURE_HT)) - return; + return -1; if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) - return; + return -1; if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) - return; + return -1; cpuid(1, &eax, &ebx, &ecx, &edx); smp_num_siblings = (ebx & 0xff0000) >> 16; + if (smp_num_siblings == 1) + pr_info_once("CPU0: Hyper-Threading is disabled\n"); +#endif + return 0; +} - if (!smp_num_siblings) - smp_num_siblings = 1; +void detect_ht(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + int index_msb, core_bits; - if (smp_num_siblings == 1) { - pr_info_once("CPU0: Hyper-Threading is disabled\n"); + if (detect_ht_early(c) < 0) return; - } index_msb = get_count_order(smp_num_siblings); c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 37672d299e35..1f14d9d00228 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -47,6 +47,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); +extern int detect_ht_early(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); -- GitLab From 9552b7df0eb1a8846890c05d375fb3b89f9c2fe6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Jun 2018 00:55:39 +0200 Subject: [PATCH 0694/1291] x86/cpu/topology: Provide detect_extended_topology_early() commit 95f3d39ccf7aaea79d1ffdac1c887c2e100ec1b6 upstream To support force disabling of SMT it's required to know the number of thread siblings early. detect_extended_topology() cannot be called before the APIC driver is selected, so split out the part which initializes smp_num_siblings. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/cpu.h | 1 + arch/x86/kernel/cpu/topology.c | 30 ++++++++++++++++++++++++------ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 1f14d9d00228..cca588407dca 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -47,6 +47,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); +extern int detect_extended_topology_early(struct cpuinfo_x86 *c); extern int detect_ht_early(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 938276f98ec1..19c6e800e816 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -27,15 +27,13 @@ * exists, use it for populating initial_apicid and cpu topology * detection. */ -void detect_extended_topology(struct cpuinfo_x86 *c) +int detect_extended_topology_early(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP - unsigned int eax, ebx, ecx, edx, sub_index; - unsigned int ht_mask_width, core_plus_mask_width; - unsigned int core_select_mask, core_level_siblings; + unsigned int eax, ebx, ecx, edx; if (c->cpuid_level < 0xb) - return; + return -1; cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); @@ -43,7 +41,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c) * check if the cpuid leaf 0xb is actually implemented. */ if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) - return; + return -1; set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); @@ -51,10 +49,30 @@ void detect_extended_topology(struct cpuinfo_x86 *c) * initial apic id, which also represents 32-bit extended x2apic id. */ c->initial_apicid = edx; + smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); +#endif + return 0; +} + +/* + * Check for extended topology enumeration cpuid leaf 0xb and if it + * exists, use it for populating initial_apicid and cpu topology + * detection. + */ +void detect_extended_topology(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned int eax, ebx, ecx, edx, sub_index; + unsigned int ht_mask_width, core_plus_mask_width; + unsigned int core_select_mask, core_level_siblings; + + if (detect_extended_topology_early(c) < 0) + return; /* * Populate HT related information from sub-leaf level 0. */ + cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); -- GitLab From d20d8f7f6a92ce1211b27af925bdd6e2beebfab8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Jun 2018 01:00:55 +0200 Subject: [PATCH 0695/1291] x86/cpu/intel: Evaluate smp_num_siblings early commit 1910ad5624968f93be48e8e265513c54d66b897c upstream Make use of the new early detection function to initialize smp_num_siblings on the boot cpu before the MP-Table or ACPI/MADT scan happens. That's required for force disabling SMT. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0b2330e19169..278be092b300 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -301,6 +301,13 @@ static void early_init_intel(struct cpuinfo_x86 *c) } check_mpx_erratum(c); + + /* + * Get the number of SMT siblings early from the extended topology + * leaf, if available. Otherwise try the legacy SMT detection. + */ + if (detect_extended_topology_early(c) < 0) + detect_ht_early(c); } #ifdef CONFIG_X86_32 -- GitLab From 35c67d5baad3e2ab745393cbfeb6c6cc9ac6a334 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 15 Jun 2018 20:48:39 +0200 Subject: [PATCH 0696/1291] x86/CPU/AMD: Do not check CPUID max ext level before parsing SMP info commit 119bff8a9c9bb00116a844ec68be7bc4b1c768f5 upstream Old code used to check whether CPUID ext max level is >= 0x80000008 because that last leaf contains the number of cores of the physical CPU. The three functions called there now do not depend on that leaf anymore so the check can go. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c17da9cd7de1..f9b3011a3575 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -835,11 +835,8 @@ static void init_amd(struct cpuinfo_x86 *c) cpu_detect_cache_sizes(c); - /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) { - amd_detect_cmp(c); - srat_detect_node(c); - } + amd_detect_cmp(c); + srat_detect_node(c); init_amd_cacheinfo(c); -- GitLab From 63f2c9b0d42f7afcdd698025885dd48bc73a8a5b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Jun 2018 00:57:38 +0200 Subject: [PATCH 0697/1291] x86/cpu/AMD: Evaluate smp_num_siblings early commit 1e1d7e25fd759eddf96d8ab39d0a90a1979b2d8c upstream To support force disabling of SMT it's required to know the number of thread siblings early. amd_get_topology() cannot be called before the APIC driver is selected, so split out the part which initializes smp_num_siblings and invoke it from amd_early_init(). Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f9b3011a3575..bac32d178fd2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -315,6 +315,17 @@ static void legacy_fixup_core_id(struct cpuinfo_x86 *c) c->cpu_core_id %= cus_per_node; } + +static void amd_get_topology_early(struct cpuinfo_x86 *c) +{ + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { + u32 eax, ebx, ecx, edx; + + cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); + smp_num_siblings = ((ebx >> 8) & 0xff) + 1; + } +} + /* * Fixup core topology information for * (1) AMD multi-node processors @@ -668,6 +679,8 @@ static void early_init_amd(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_SME); } } + + amd_get_topology_early(c); } static void init_amd_k8(struct cpuinfo_x86 *c) -- GitLab From f02f2ad9e711881bcb62f0fc64084c32af3600e4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Jun 2018 14:00:11 +0200 Subject: [PATCH 0698/1291] x86/apic: Ignore secondary threads if nosmt=force MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2207def700f902f169fc237b717252c326f9e464 upstream nosmt on the kernel command line merely prevents the onlining of the secondary SMT siblings. nosmt=force makes the APIC detection code ignore the secondary SMT siblings completely, so they even do not show up as possible CPUs. That reduces the amount of memory allocations for per cpu variables and saves other resources from being allocated too large. This is not fully equivalent to disabling SMT in the BIOS because the low level SMT enabling in the BIOS can result in partitioning of resources between the siblings, which is not undone by just ignoring them. Some CPUs can use the full resources when their sibling is not onlined, but this is depending on the CPU family and model and it's not well documented whether this applies to all partitioned resources. That means depending on the workload disabling SMT in the BIOS might result in better performance. Linus analysis of the Intel manual: The intel optimization manual is not very clear on what the partitioning rules are. I find: "In general, the buffers for staging instructions between major pipe stages are partitioned. These buffers include µop queues after the execution trace cache, the queues after the register rename stage, the reorder buffer which stages instructions for retirement, and the load and store buffers. In the case of load and store buffers, partitioning also provided an easier implementation to maintain memory ordering for each logical processor and detect memory ordering violations" but some of that partitioning may be relaxed if the HT thread is "not active": "In Intel microarchitecture code name Sandy Bridge, the micro-op queue is statically partitioned to provide 28 entries for each logical processor, irrespective of software executing in single thread or multiple threads. If one logical processor is not active in Intel microarchitecture code name Ivy Bridge, then a single thread executing on that processor core can use the 56 entries in the micro-op queue" but I do not know what "not active" means, and how dynamic it is. Some of that partitioning may be entirely static and depend on the early BIOS disabling of HT, and even if we park the cores, the resources will just be wasted. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/apic.h | 2 ++ arch/x86/kernel/acpi/boot.c | 3 ++- arch/x86/kernel/apic/apic.c | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 77ffd3b7894f..1430522b2b40 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -616,8 +616,10 @@ extern int default_check_phys_apicid_present(int phys_apicid); #ifdef CONFIG_SMP bool apic_id_is_primary_thread(unsigned int id); +bool apic_id_disabled(unsigned int id); #else static inline bool apic_id_is_primary_thread(unsigned int id) { return false; } +static inline bool apic_id_disabled(unsigned int id) { return false; } #endif extern void irq_enter(void); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 6dda3595acf8..65afb2f1cd1a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -181,7 +181,8 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) } if (!enabled) { - ++disabled_cpus; + if (!apic_id_disabled(id)) + ++disabled_cpus; return -EINVAL; } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 0a10d3ed77ce..828ee43abd91 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2107,6 +2107,16 @@ bool apic_id_is_primary_thread(unsigned int apicid) return !(apicid & mask); } +/** + * apic_id_disabled - Check whether APIC ID is disabled via SMT control + * @id: APIC ID to check + */ +bool apic_id_disabled(unsigned int id) +{ + return (cpu_smt_control == CPU_SMT_FORCE_DISABLED && + !apic_id_is_primary_thread(id)); +} + /* * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids * and cpuid_to_apicid[] synchronized. @@ -2202,6 +2212,15 @@ int generic_processor_info(int apicid, int version) return -EINVAL; } + /* + * If SMT is force disabled and the APIC ID belongs to + * a secondary thread, ignore it. + */ + if (apic_id_disabled(apicid)) { + pr_info_once("Ignoring secondary SMT threads\n"); + return -EINVAL; + } + if (apicid == boot_cpu_physical_apicid) { /* * x86_bios_cpu_apicid is required to have processors listed -- GitLab From c6374a6259fd10e5fe450bd10cf75a8e04d1b309 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 21 Jun 2018 12:36:29 +0200 Subject: [PATCH 0699/1291] x86/speculation/l1tf: Extend 64bit swap file size limit commit 1a7ed1ba4bba6c075d5ad61bb75e3fbc870840d6 upstream The previous patch has limited swap file size so that large offsets cannot clear bits above MAX_PA/2 in the pte and interfere with L1TF mitigation. It assumed that offsets are encoded starting with bit 12, same as pfn. But on x86_64, offsets are encoded starting with bit 9. Thus the limit can be raised by 3 bits. That means 16TB with 42bit MAX_PA and 256TB with 46bit MAX_PA. Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2") Signed-off-by: Vlastimil Babka Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index a21cf622a65e..abf54d040096 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -891,7 +891,15 @@ unsigned long max_swapfile_size(void) if (boot_cpu_has_bug(X86_BUG_L1TF)) { /* Limit the swap file size to MAX_PA/2 for L1TF workaround */ - pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages); + unsigned long l1tf_limit = l1tf_pfn_limit() + 1; + /* + * We encode swap offsets also with 3 bits below those for pfn + * which makes the usable limit higher. + */ +#ifdef CONFIG_X86_64 + l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT; +#endif + pages = min_t(unsigned long, l1tf_limit, pages); } return pages; } -- GitLab From 62f43866636d08efca34b87cd2bf8b0f49edb27c Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Jun 2018 16:42:58 -0400 Subject: [PATCH 0700/1291] x86/cpufeatures: Add detection of L1D cache flush support. commit 11e34e64e4103955fc4568750914c75d65ea87ee upstream 336996-Speculative-Execution-Side-Channel-Mitigations.pdf defines a new MSR (IA32_FLUSH_CMD) which is detected by CPUID.7.EDX[28]=1 bit being set. This new MSR "gives software a way to invalidate structures with finer granularity than other architectual methods like WBINVD." A copy of this document is available at https://bugzilla.kernel.org/show_bug.cgi?id=199511 Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 30c23f539418..8418462298e7 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -339,6 +339,7 @@ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ -- GitLab From 2a82e5e51fc0153024c52d64ae00dcf006311c80 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 22 Jun 2018 11:34:11 +0200 Subject: [PATCH 0701/1291] x86/CPU/AMD: Move TOPOEXT reenablement before reading smp_num_siblings commit 7ce2f0393ea2396142b7faf6ee9b1f3676d08a5f upstream The TOPOEXT reenablement is a workaround for broken BIOSen which didn't enable the CPUID bit. amd_get_topology_early(), however, relies on that bit being set so that it can read out the CPUID leaf and set smp_num_siblings properly. Move the reenablement up to early_init_amd(). While at it, simplify amd_get_topology_early(). Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bac32d178fd2..bb013735510a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -318,12 +318,8 @@ static void legacy_fixup_core_id(struct cpuinfo_x86 *c) static void amd_get_topology_early(struct cpuinfo_x86 *c) { - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - u32 eax, ebx, ecx, edx; - - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - smp_num_siblings = ((ebx >> 8) & 0xff) + 1; - } + if (cpu_has(c, X86_FEATURE_TOPOEXT)) + smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; } /* @@ -344,7 +340,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c) cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); node_id = ecx & 0xff; - smp_num_siblings = ((ebx >> 8) & 0xff) + 1; if (c->x86 == 0x15) c->cu_id = ebx & 0xff; @@ -590,6 +585,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) static void early_init_amd(struct cpuinfo_x86 *c) { + u64 value; u32 dummy; early_init_amd_mc(c); @@ -680,6 +676,20 @@ static void early_init_amd(struct cpuinfo_x86 *c) } } + /* Re-enable TopologyExtensions if switched off by BIOS */ + if (c->x86 == 0x15 && + (c->x86_model >= 0x10 && c->x86_model <= 0x6f) && + !cpu_has(c, X86_FEATURE_TOPOEXT)) { + + if (msr_set_bit(0xc0011005, 54) > 0) { + rdmsrl(0xc0011005, value); + if (value & BIT_64(54)) { + set_cpu_cap(c, X86_FEATURE_TOPOEXT); + pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); + } + } + } + amd_get_topology_early(c); } @@ -772,19 +782,6 @@ static void init_amd_bd(struct cpuinfo_x86 *c) { u64 value; - /* re-enable TopologyExtensions if switched off by BIOS */ - if ((c->x86_model >= 0x10) && (c->x86_model <= 0x6f) && - !cpu_has(c, X86_FEATURE_TOPOEXT)) { - - if (msr_set_bit(0xc0011005, 54) > 0) { - rdmsrl(0xc0011005, value); - if (value & BIT_64(54)) { - set_cpu_cap(c, X86_FEATURE_TOPOEXT); - pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); - } - } - } - /* * The way access filter has a performance penalty on some workloads. * Disable it on the affected CPUs. -- GitLab From b9cdf143eb9c24536750823097635da6bb7e362f Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 22 Jun 2018 17:39:33 +0200 Subject: [PATCH 0702/1291] x86/speculation/l1tf: Protect PAE swap entries against L1TF commit 0d0f6249058834ffe1ceaad0bb31464af66f6e7a upstream The PAE 3-level paging code currently doesn't mitigate L1TF by flipping the offset bits, and uses the high PTE word, thus bits 32-36 for type, 37-63 for offset. The lower word is zeroed, thus systems with less than 4GB memory are safe. With 4GB to 128GB the swap type selects the memory locations vulnerable to L1TF; with even more memory, also the swap offfset influences the address. This might be a problem with 32bit PAE guests running on large 64bit hosts. By continuing to keep the whole swap entry in either high or low 32bit word of PTE we would limit the swap size too much. Thus this patch uses the whole PAE PTE with the same layout as the 64bit version does. The macros just become a bit tricky since they assume the arch-dependent swp_entry_t to be 32bit. Signed-off-by: Vlastimil Babka Signed-off-by: Thomas Gleixner Acked-by: Michal Hocko Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable-3level.h | 35 +++++++++++++++++++++++++-- arch/x86/mm/init.c | 2 +- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 1ef2be4e9b69..9dc19b4a2a87 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -206,12 +206,43 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp) #endif /* Encode and de-code a swap entry */ +#define SWP_TYPE_BITS 5 + +#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) + +/* We always extract/encode the offset by shifting it all the way up, and then down again */ +#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS) + #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) #define __swp_type(x) (((x).val) & 0x1f) #define __swp_offset(x) ((x).val >> 5) #define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) -#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) -#define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } }) + +/* + * Normally, __swp_entry() converts from arch-independent swp_entry_t to + * arch-dependent swp_entry_t, and __swp_entry_to_pte() just stores the result + * to pte. But here we have 32bit swp_entry_t and 64bit pte, and need to use the + * whole 64 bits. Thus, we shift the "real" arch-dependent conversion to + * __swp_entry_to_pte() through the following helper macro based on 64bit + * __swp_entry(). + */ +#define __swp_pteval_entry(type, offset) ((pteval_t) { \ + (~(pteval_t)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ + | ((pteval_t)(type) << (64 - SWP_TYPE_BITS)) }) + +#define __swp_entry_to_pte(x) ((pte_t){ .pte = \ + __swp_pteval_entry(__swp_type(x), __swp_offset(x)) }) +/* + * Analogically, __pte_to_swp_entry() doesn't just extract the arch-dependent + * swp_entry_t, but also has to convert it from 64bit to the 32bit + * intermediate representation, using the following macros based on 64bit + * __swp_type() and __swp_offset(). + */ +#define __pteval_swp_type(x) ((unsigned long)((x).pte >> (64 - SWP_TYPE_BITS))) +#define __pteval_swp_offset(x) ((unsigned long)(~((x).pte) << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)) + +#define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \ + __pteval_swp_offset(pte))) #define gup_get_pte gup_get_pte /* diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index abf54d040096..1d55a3dfd741 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -896,7 +896,7 @@ unsigned long max_swapfile_size(void) * We encode swap offsets also with 3 bits below those for pfn * which makes the usable limit higher. */ -#ifdef CONFIG_X86_64 +#if CONFIG_PGTABLE_LEVELS > 2 l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT; #endif pages = min_t(unsigned long, l1tf_limit, pages); -- GitLab From 49221fc7e775656e87978aa56519fab12bb2560b Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 27 Jun 2018 17:46:50 +0200 Subject: [PATCH 0703/1291] x86/speculation/l1tf: Fix up pte->pfn conversion for PAE commit e14d7dfb41f5807a0c1c26a13f2b8ef16af24935 upstream Jan has noticed that pte_pfn and co. resp. pfn_pte are incorrect for CONFIG_PAE because phys_addr_t is wider than unsigned long and so the pte_val reps. shift left would get truncated. Fix this up by using proper types. Fixes: 6b28baca9b1f ("x86/speculation/l1tf: Protect PROT_NONE PTEs against speculation") Reported-by: Jan Beulich Signed-off-by: Michal Hocko Signed-off-by: Thomas Gleixner Acked-by: Vlastimil Babka Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a242560af32f..2064571dc9be 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -191,21 +191,21 @@ static inline u64 protnone_mask(u64 val); static inline unsigned long pte_pfn(pte_t pte) { - unsigned long pfn = pte_val(pte); + phys_addr_t pfn = pte_val(pte); pfn ^= protnone_mask(pfn); return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT; } static inline unsigned long pmd_pfn(pmd_t pmd) { - unsigned long pfn = pmd_val(pmd); + phys_addr_t pfn = pmd_val(pmd); pfn ^= protnone_mask(pfn); return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; } static inline unsigned long pud_pfn(pud_t pud) { - unsigned long pfn = pud_val(pud); + phys_addr_t pfn = pud_val(pud); pfn ^= protnone_mask(pfn); return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT; } @@ -538,7 +538,7 @@ static inline pgprotval_t massage_pgprot(pgprot_t pgprot) static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - phys_addr_t pfn = page_nr << PAGE_SHIFT; + phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PTE_PFN_MASK; return __pte(pfn | massage_pgprot(pgprot)); @@ -546,7 +546,7 @@ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - phys_addr_t pfn = page_nr << PAGE_SHIFT; + phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PHYSICAL_PMD_PAGE_MASK; return __pmd(pfn | massage_pgprot(pgprot)); @@ -554,7 +554,7 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) { - phys_addr_t pfn = page_nr << PAGE_SHIFT; + phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PHYSICAL_PUD_PAGE_MASK; return __pud(pfn | massage_pgprot(pgprot)); -- GitLab From f3e68ab4e778e575ab9fba10894387d177af510a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Jun 2018 16:05:47 +0200 Subject: [PATCH 0704/1291] Revert "x86/apic: Ignore secondary threads if nosmt=force" commit 506a66f374891ff08e064a058c446b336c5ac760 upstream Dave Hansen reported, that it's outright dangerous to keep SMT siblings disabled completely so they are stuck in the BIOS and wait for SIPI. The reason is that Machine Check Exceptions are broadcasted to siblings and the soft disabled sibling has CR4.MCE = 0. If a MCE is delivered to a logical core with CR4.MCE = 0, it asserts IERR#, which shuts down or reboots the machine. The MCE chapter in the SDM contains the following blurb: Because the logical processors within a physical package are tightly coupled with respect to shared hardware resources, both logical processors are notified of machine check errors that occur within a given physical processor. If machine-check exceptions are enabled when a fatal error is reported, all the logical processors within a physical package are dispatched to the machine-check exception handler. If machine-check exceptions are disabled, the logical processors enter the shutdown state and assert the IERR# signal. When enabling machine-check exceptions, the MCE flag in control register CR4 should be set for each logical processor. Reverting the commit which ignores siblings at enumeration time solves only half of the problem. The core cpuhotplug logic needs to be adjusted as well. This thoughtful engineered mechanism also turns the boot process on all Intel HT enabled systems into a MCE lottery. MCE is enabled on the boot CPU before the secondary CPUs are brought up. Depending on the number of physical cores the window in which this situation can happen is smaller or larger. On a HSW-EX it's about 750ms: MCE is enabled on the boot CPU: [ 0.244017] mce: CPU supports 22 MCE banks The corresponding sibling #72 boots: [ 1.008005] .... node #0, CPUs: #72 That means if an MCE hits on physical core 0 (logical CPUs 0 and 72) between these two points the machine is going to shutdown. At least it's a known safe state. It's obvious that the early boot can be hit by an MCE as well and then runs into the same situation because MCEs are not yet enabled on the boot CPU. But after enabling them on the boot CPU, it does not make any sense to prevent the kernel from recovering. Adjust the nosmt kernel parameter documentation as well. Reverts: 2207def700f9 ("x86/apic: Ignore secondary threads if nosmt=force") Reported-by: Dave Hansen Signed-off-by: Thomas Gleixner Tested-by: Tony Luck Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 8 ++------ arch/x86/include/asm/apic.h | 2 -- arch/x86/kernel/acpi/boot.c | 3 +-- arch/x86/kernel/apic/apic.c | 19 ------------------- 4 files changed, 3 insertions(+), 29 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 38d75cc78a3d..9f3ffe23a7c9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2596,12 +2596,8 @@ Equivalent to smt=1. [KNL,x86] Disable symmetric multithreading (SMT). - nosmt=force: Force disable SMT, similar to disabling - it in the BIOS except that some of the - resource partitioning effects which are - caused by having SMT enabled in the BIOS - cannot be undone. Depending on the CPU - type this might have a performance impact. + nosmt=force: Force disable SMT, cannot be undone + via the sysfs control file. nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2 (indirect branch prediction) vulnerability. System may diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1430522b2b40..77ffd3b7894f 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -616,10 +616,8 @@ extern int default_check_phys_apicid_present(int phys_apicid); #ifdef CONFIG_SMP bool apic_id_is_primary_thread(unsigned int id); -bool apic_id_disabled(unsigned int id); #else static inline bool apic_id_is_primary_thread(unsigned int id) { return false; } -static inline bool apic_id_disabled(unsigned int id) { return false; } #endif extern void irq_enter(void); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 65afb2f1cd1a..6dda3595acf8 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -181,8 +181,7 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) } if (!enabled) { - if (!apic_id_disabled(id)) - ++disabled_cpus; + ++disabled_cpus; return -EINVAL; } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 828ee43abd91..0a10d3ed77ce 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2107,16 +2107,6 @@ bool apic_id_is_primary_thread(unsigned int apicid) return !(apicid & mask); } -/** - * apic_id_disabled - Check whether APIC ID is disabled via SMT control - * @id: APIC ID to check - */ -bool apic_id_disabled(unsigned int id) -{ - return (cpu_smt_control == CPU_SMT_FORCE_DISABLED && - !apic_id_is_primary_thread(id)); -} - /* * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids * and cpuid_to_apicid[] synchronized. @@ -2212,15 +2202,6 @@ int generic_processor_info(int apicid, int version) return -EINVAL; } - /* - * If SMT is force disabled and the APIC ID belongs to - * a secondary thread, ignore it. - */ - if (apic_id_disabled(apicid)) { - pr_info_once("Ignoring secondary SMT threads\n"); - return -EINVAL; - } - if (apicid == boot_cpu_physical_apicid) { /* * x86_bios_cpu_apicid is required to have processors listed -- GitLab From 7f2229c92b9ea4d4b1b50671b9fe5afadbf0b4f4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Jun 2018 16:05:48 +0200 Subject: [PATCH 0705/1291] cpu/hotplug: Boot HT siblings at least once commit 0cc3cd21657be04cb0559fe8063f2130493f92cf upstream Due to the way Machine Check Exceptions work on X86 hyperthreads it's required to boot up _all_ logical cores at least once in order to set the CR4.MCE bit. So instead of ignoring the sibling threads right away, let them boot up once so they can configure themselves. After they came out of the initial boot stage check whether its a "secondary" sibling and cancel the operation which puts the CPU back into offline state. Reported-by: Dave Hansen Signed-off-by: Thomas Gleixner Tested-by: Tony Luck Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 72 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 55a93d360921..9d9d0640097b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -60,6 +60,7 @@ struct cpuhp_cpu_state { bool rollback; bool single; bool bringup; + bool booted_once; struct hlist_node *node; struct hlist_node *last; enum cpuhp_state cb_state; @@ -346,6 +347,40 @@ void cpu_hotplug_enable(void) EXPORT_SYMBOL_GPL(cpu_hotplug_enable); #endif /* CONFIG_HOTPLUG_CPU */ +#ifdef CONFIG_HOTPLUG_SMT +enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; + +static int __init smt_cmdline_disable(char *str) +{ + cpu_smt_control = CPU_SMT_DISABLED; + if (str && !strcmp(str, "force")) { + pr_info("SMT: Force disabled\n"); + cpu_smt_control = CPU_SMT_FORCE_DISABLED; + } + return 0; +} +early_param("nosmt", smt_cmdline_disable); + +static inline bool cpu_smt_allowed(unsigned int cpu) +{ + if (cpu_smt_control == CPU_SMT_ENABLED) + return true; + + if (topology_is_primary_thread(cpu)) + return true; + + /* + * On x86 it's required to boot all logical CPUs at least once so + * that the init code can get a chance to set CR4.MCE on each + * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any + * core will shutdown the machine. + */ + return !per_cpu(cpuhp_state, cpu).booted_once; +} +#else +static inline bool cpu_smt_allowed(unsigned int cpu) { return true; } +#endif + static inline enum cpuhp_state cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) { @@ -426,6 +461,16 @@ static int bringup_wait_for_ap(unsigned int cpu) stop_machine_unpark(cpu); kthread_unpark(st->thread); + /* + * SMT soft disabling on X86 requires to bring the CPU out of the + * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The + * CPU marked itself as booted_once in cpu_notify_starting() so the + * cpu_smt_allowed() check will now return false if this is not the + * primary sibling. + */ + if (!cpu_smt_allowed(cpu)) + return -ECANCELED; + if (st->target <= CPUHP_AP_ONLINE_IDLE) return 0; @@ -937,29 +982,6 @@ EXPORT_SYMBOL(cpu_down); #define takedown_cpu NULL #endif /*CONFIG_HOTPLUG_CPU*/ -#ifdef CONFIG_HOTPLUG_SMT -enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; - -static int __init smt_cmdline_disable(char *str) -{ - cpu_smt_control = CPU_SMT_DISABLED; - if (str && !strcmp(str, "force")) { - pr_info("SMT: Force disabled\n"); - cpu_smt_control = CPU_SMT_FORCE_DISABLED; - } - return 0; -} -early_param("nosmt", smt_cmdline_disable); - -static inline bool cpu_smt_allowed(unsigned int cpu) -{ - return cpu_smt_control == CPU_SMT_ENABLED || - topology_is_primary_thread(cpu); -} -#else -static inline bool cpu_smt_allowed(unsigned int cpu) { return true; } -#endif - /** * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU * @cpu: cpu that just started @@ -974,6 +996,7 @@ void notify_cpu_starting(unsigned int cpu) int ret; rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ + st->booted_once = true; while (st->state < target) { st->state++; ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); @@ -2192,5 +2215,6 @@ void __init boot_cpu_init(void) */ void __init boot_cpu_hotplug_init(void) { - per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE; + this_cpu_write(cpuhp_state.booted_once, true); + this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); } -- GitLab From c2fdbbb47ca834b9b19fbbf0a9b740146a508b84 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Jun 2018 11:29:53 -0400 Subject: [PATCH 0706/1291] x86/KVM: Warn user if KVM is loaded SMT and L1TF CPU bug being present commit 26acfb666a473d960f0fd971fe68f3e3ad16c70b upstream If the L1TF CPU bug is present we allow the KVM module to be loaded as the major of users that use Linux and KVM have trusted guests and do not want a broken setup. Cloud vendors are the ones that are uncomfortable with CVE 2018-3620 and as such they are the ones that should set nosmt to one. Setting 'nosmt' means that the system administrator also needs to disable SMT (Hyper-threading) in the BIOS, or via the 'nosmt' command line parameter, or via the /sys/devices/system/cpu/smt/control. See commit 05736e4ac13c ("cpu/hotplug: Provide knobs to control SMT"). Other mitigations are to use task affinity, cpu sets, interrupt binding, etc - anything to make sure that _only_ the same guests vCPUs are running on sibling threads. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 6 ++++++ arch/x86/kvm/vmx.c | 19 +++++++++++++++++++ kernel/cpu.c | 1 + 3 files changed, 26 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9f3ffe23a7c9..8554fe13ad6d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1867,6 +1867,12 @@ [KVM,ARM] Trap guest accesses to GICv3 common system registers + kvm-intel.nosmt=[KVM,Intel] If the L1TF CPU bug is present (CVE-2018-3620) + and the system has SMT (aka Hyper-Threading) enabled then + don't allow guests to be created. + + Default is 0 (allow guests to be created). + kvm-intel.ept= [KVM,Intel] Disable extended page tables (virtualized MMU) support on capable Intel chips. Default is 1 (enabled) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8d000fde1414..89a00f7bd27f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -70,6 +70,9 @@ static const struct x86_cpu_id vmx_cpu_id[] = { }; MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); +static bool __read_mostly nosmt; +module_param(nosmt, bool, S_IRUGO); + static bool __read_mostly enable_vpid = 1; module_param_named(vpid, enable_vpid, bool, 0444); @@ -9835,6 +9838,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) return ERR_PTR(err); } +#define L1TF_MSG "SMT enabled with L1TF CPU bug present. Refer to CVE-2018-3620 for details.\n" + +static int vmx_vm_init(struct kvm *kvm) +{ + if (boot_cpu_has(X86_BUG_L1TF) && cpu_smt_control == CPU_SMT_ENABLED) { + if (nosmt) { + pr_err(L1TF_MSG); + return -EOPNOTSUPP; + } + pr_warn(L1TF_MSG); + } + return 0; +} + static void __init vmx_check_processor_compat(void *rtn) { struct vmcs_config vmcs_conf; @@ -12225,6 +12242,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .cpu_has_accelerated_tpr = report_flexpriority, .has_emulated_msr = vmx_has_emulated_msr, + .vm_init = vmx_vm_init, + .vcpu_create = vmx_create_vcpu, .vcpu_free = vmx_free_vcpu, .vcpu_reset = vmx_vcpu_reset, diff --git a/kernel/cpu.c b/kernel/cpu.c index 9d9d0640097b..e5362830e7ae 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -349,6 +349,7 @@ EXPORT_SYMBOL_GPL(cpu_hotplug_enable); #ifdef CONFIG_HOTPLUG_SMT enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; +EXPORT_SYMBOL_GPL(cpu_smt_control); static int __init smt_cmdline_disable(char *str) { -- GitLab From 77c8220e0d01d8c1c85343fa9472fd957c4878a0 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 2 Jul 2018 12:29:30 +0200 Subject: [PATCH 0707/1291] x86/KVM/VMX: Add module argument for L1TF mitigation commit a399477e52c17e148746d3ce9a483f681c2aa9a0 upstream Add a mitigation mode parameter "vmentry_l1d_flush" for CVE-2018-3620, aka L1 terminal fault. The valid arguments are: - "always" L1D cache flush on every VMENTER. - "cond" Conditional L1D cache flush, explained below - "never" Disable the L1D cache flush mitigation "cond" is trying to avoid L1D cache flushes on VMENTER if the code executed between VMEXIT and VMENTER is considered safe, i.e. is not bringing any interesting information into L1D which might exploited. [ tglx: Split out from a larger patch ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 12 ++++ arch/x86/kvm/vmx.c | 65 ++++++++++++++++++- 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 8554fe13ad6d..e928ccff7d23 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1894,6 +1894,18 @@ (virtualized real and unpaged mode) on capable Intel chips. Default is 1 (enabled) + kvm-intel.vmentry_l1d_flush=[KVM,Intel] Mitigation for L1 Terminal Fault + CVE-2018-3620. + + Valid arguments: never, cond, always + + always: L1D cache flush on every VMENTER. + cond: Flush L1D on VMENTER only when the code between + VMEXIT and VMENTER can leak host memory. + never: Disables the mitigation + + Default is cond (do L1 cache flush in specific instances) + kvm-intel.vpid= [KVM,Intel] Disable Virtual Processor Identification feature (tagged TLBs) on capable Intel chips. Default is 1 (enabled) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 89a00f7bd27f..7625d227b593 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -194,6 +194,54 @@ module_param(ple_window_max, int, S_IRUGO); extern const ulong vmx_return; +static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); + +/* These MUST be in sync with vmentry_l1d_param order. */ +enum vmx_l1d_flush_state { + VMENTER_L1D_FLUSH_NEVER, + VMENTER_L1D_FLUSH_COND, + VMENTER_L1D_FLUSH_ALWAYS, +}; + +static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush = VMENTER_L1D_FLUSH_COND; + +static const struct { + const char *option; + enum vmx_l1d_flush_state cmd; +} vmentry_l1d_param[] = { + {"never", VMENTER_L1D_FLUSH_NEVER}, + {"cond", VMENTER_L1D_FLUSH_COND}, + {"always", VMENTER_L1D_FLUSH_ALWAYS}, +}; + +static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) +{ + unsigned int i; + + if (!s) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) { + if (!strcmp(s, vmentry_l1d_param[i].option)) { + vmentry_l1d_flush = vmentry_l1d_param[i].cmd; + return 0; + } + } + + return -EINVAL; +} + +static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) +{ + return sprintf(s, "%s\n", vmentry_l1d_param[vmentry_l1d_flush].option); +} + +static const struct kernel_param_ops vmentry_l1d_flush_ops = { + .set = vmentry_l1d_flush_set, + .get = vmentry_l1d_flush_get, +}; +module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, &vmentry_l1d_flush, S_IRUGO); + #define NR_AUTOLOAD_MSRS 8 struct vmcs { @@ -12360,10 +12408,23 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .setup_mce = vmx_setup_mce, }; +static void __init vmx_setup_l1d_flush(void) +{ + if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER || + !boot_cpu_has_bug(X86_BUG_L1TF)) + return; + + static_branch_enable(&vmx_l1d_should_flush); +} + static int __init vmx_init(void) { - int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), - __alignof__(struct vcpu_vmx), THIS_MODULE); + int r; + + vmx_setup_l1d_flush(); + + r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), + __alignof__(struct vcpu_vmx), THIS_MODULE); if (r) return r; -- GitLab From a662a3d89f1618a33540ab345945bbcd812f9057 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 2 Jul 2018 12:47:38 +0200 Subject: [PATCH 0708/1291] x86/KVM/VMX: Add L1D flush algorithm commit a47dd5f06714c844b33f3b5f517b6f3e81ce57b5 upstream To mitigate the L1 Terminal Fault vulnerability it's required to flush L1D on VMENTER to prevent rogue guests from snooping host memory. CPUs will have a new control MSR via a microcode update to flush L1D with a single MSR write, but in the absence of microcode a fallback to a software based flush algorithm is required. Add a software flush loop which is based on code from Intel. [ tglx: Split out from combo patch ] [ bpetkov: Polish the asm code ] Signed-off-by: Paolo Bonzini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 70 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7625d227b593..c4c5849f0a15 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9038,6 +9038,46 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) } } +/* + * Software based L1D cache flush which is used when microcode providing + * the cache control MSR is not loaded. + * + * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to + * flush it is required to read in 64 KiB because the replacement algorithm + * is not exactly LRU. This could be sized at runtime via topology + * information but as all relevant affected CPUs have 32KiB L1D cache size + * there is no point in doing so. + */ +#define L1D_CACHE_ORDER 4 +static void *vmx_l1d_flush_pages; + +static void __maybe_unused vmx_l1d_flush(void) +{ + int size = PAGE_SIZE << L1D_CACHE_ORDER; + + asm volatile( + /* First ensure the pages are in the TLB */ + "xorl %%eax, %%eax\n" + ".Lpopulate_tlb:\n\t" + "movzbl (%[empty_zp], %%" _ASM_AX "), %%ecx\n\t" + "addl $4096, %%eax\n\t" + "cmpl %%eax, %[size]\n\t" + "jne .Lpopulate_tlb\n\t" + "xorl %%eax, %%eax\n\t" + "cpuid\n\t" + /* Now fill the cache */ + "xorl %%eax, %%eax\n" + ".Lfill_cache:\n" + "movzbl (%[empty_zp], %%" _ASM_AX "), %%ecx\n\t" + "addl $64, %%eax\n\t" + "cmpl %%eax, %[size]\n\t" + "jne .Lfill_cache\n\t" + "lfence\n" + :: [empty_zp] "r" (vmx_l1d_flush_pages), + [size] "r" (size) + : "eax", "ebx", "ecx", "edx"); +} + static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); @@ -12408,25 +12448,45 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .setup_mce = vmx_setup_mce, }; -static void __init vmx_setup_l1d_flush(void) +static int __init vmx_setup_l1d_flush(void) { + struct page *page; + if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER || !boot_cpu_has_bug(X86_BUG_L1TF)) - return; + return 0; + + page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); + if (!page) + return -ENOMEM; + vmx_l1d_flush_pages = page_address(page); static_branch_enable(&vmx_l1d_should_flush); + return 0; +} + +static void vmx_free_l1d_flush_pages(void) +{ + if (vmx_l1d_flush_pages) { + free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER); + vmx_l1d_flush_pages = NULL; + } } static int __init vmx_init(void) { int r; - vmx_setup_l1d_flush(); + r = vmx_setup_l1d_flush(); + if (r) + return r; r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx), THIS_MODULE); - if (r) + if (r) { + vmx_free_l1d_flush_pages(); return r; + } #ifdef CONFIG_KEXEC_CORE rcu_assign_pointer(crash_vmclear_loaded_vmcss, @@ -12444,6 +12504,8 @@ static void __exit vmx_exit(void) #endif kvm_exit(); + + vmx_free_l1d_flush_pages(); } module_init(vmx_init) -- GitLab From e993d9c0376a143f0407ec025c44569cb0a6032c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 2 Jul 2018 13:03:48 +0200 Subject: [PATCH 0709/1291] x86/KVM/VMX: Add L1D MSR based flush commit 3fa045be4c720146b18a19cea7a767dc6ad5df94 upstream 336996-Speculative-Execution-Side-Channel-Mitigations.pdf defines a new MSR (IA32_FLUSH_CMD aka 0x10B) which has similar write-only semantics to other MSRs defined in the document. The semantics of this MSR is to allow "finer granularity invalidation of caching structures than existing mechanisms like WBINVD. It will writeback and invalidate the L1 data cache, including all cachelines brought in by preceding instructions, without invalidating all caches (eg. L2 or LLC). Some processors may also invalidate the first level level instruction cache on a L1D_FLUSH command. The L1 data and instruction caches may be shared across the logical processors of a core." Use it instead of the loop based L1 flush algorithm. A copy of this document is available at https://bugzilla.kernel.org/show_bug.cgi?id=199511 [ tglx: Avoid allocating pages when the MSR is available ] Signed-off-by: Paolo Bonzini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 6 ++++++ arch/x86/kvm/vmx.c | 15 +++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 504b21692d32..2cccec4ab1c9 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -76,6 +76,12 @@ * control required. */ +#define MSR_IA32_FLUSH_CMD 0x0000010b +#define L1D_FLUSH (1 << 0) /* + * Writeback and invalidate the + * L1 data cache. + */ + #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c4c5849f0a15..7faa6ffc4e42 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9055,6 +9055,11 @@ static void __maybe_unused vmx_l1d_flush(void) { int size = PAGE_SIZE << L1D_CACHE_ORDER; + if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) { + wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); + return; + } + asm volatile( /* First ensure the pages are in the TLB */ "xorl %%eax, %%eax\n" @@ -12456,11 +12461,13 @@ static int __init vmx_setup_l1d_flush(void) !boot_cpu_has_bug(X86_BUG_L1TF)) return 0; - page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); - if (!page) - return -ENOMEM; + if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { + page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); + if (!page) + return -ENOMEM; + vmx_l1d_flush_pages = page_address(page); + } - vmx_l1d_flush_pages = page_address(page); static_branch_enable(&vmx_l1d_should_flush); return 0; } -- GitLab From 84f5b2512f6f6902e732adb84e9646a4ee18c51c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 2 Jul 2018 13:07:14 +0200 Subject: [PATCH 0710/1291] x86/KVM/VMX: Add L1D flush logic commit c595ceee45707f00f64f61c54fb64ef0cc0b4e85 upstream Add the logic for flushing L1D on VMENTER. The flush depends on the static key being enabled and the new l1tf_flush_l1d flag being set. The flags is set: - Always, if the flush module parameter is 'always' - Conditionally at: - Entry to vcpu_run(), i.e. after executing user space - From the sched_in notifier, i.e. when switching to a vCPU thread. - From vmexit handlers which are considered unsafe, i.e. where sensitive data can be brought into L1D: - The emulator, which could be a good target for other speculative execution-based threats, - The MMU, which can bring host page tables in the L1 cache. - External interrupts - Nested operations that require the MMU (see above). That is vmptrld, vmptrst, vmclear,vmwrite,vmread. - When handling invept,invvpid [ tglx: Split out from combo patch and reduced to a single flag ] Signed-off-by: Paolo Bonzini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 4 ++++ arch/x86/kvm/mmu.c | 1 + arch/x86/kvm/vmx.c | 22 +++++++++++++++++++++- arch/x86/kvm/x86.c | 8 ++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 174b9c41efce..a41d58822070 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -693,6 +693,9 @@ struct kvm_vcpu_arch { /* be preempted when it's in kernel-mode(cpl=0) */ bool preempted_in_kernel; + + /* Flush the L1 Data cache for L1TF mitigation on VMENTER */ + bool l1tf_flush_l1d; }; struct kvm_lpage_info { @@ -862,6 +865,7 @@ struct kvm_vcpu_stat { u64 signal_exits; u64 irq_window_exits; u64 nmi_window_exits; + u64 l1d_flush; u64 halt_exits; u64 halt_successful_poll; u64 halt_attempted_poll; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2ef2f1fe875b..00e2ae033a0f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3825,6 +3825,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, { int r = 1; + vcpu->arch.l1tf_flush_l1d = true; switch (vcpu->arch.apf.host_apf_reason) { default: trace_kvm_page_fault(fault_address, error_code); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7faa6ffc4e42..e7eedccc15f2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9051,9 +9051,20 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) #define L1D_CACHE_ORDER 4 static void *vmx_l1d_flush_pages; -static void __maybe_unused vmx_l1d_flush(void) +static void vmx_l1d_flush(struct kvm_vcpu *vcpu) { int size = PAGE_SIZE << L1D_CACHE_ORDER; + bool always; + + /* + * If the mitigation mode is 'flush always', keep the flush bit + * set, otherwise clear it. It gets set again either from + * vcpu_run() or from one of the unsafe VMEXIT handlers. + */ + always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS; + vcpu->arch.l1tf_flush_l1d = always; + + vcpu->stat.l1d_flush++; if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) { wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); @@ -9324,6 +9335,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) [ss]"i"(__KERNEL_DS), [cs]"i"(__KERNEL_CS) ); + vcpu->arch.l1tf_flush_l1d = true; } } STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); @@ -9579,6 +9591,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->__launched = vmx->loaded_vmcs->launched; + if (static_branch_unlikely(&vmx_l1d_should_flush)) { + if (vcpu->arch.l1tf_flush_l1d) + vmx_l1d_flush(vcpu); + } + asm( /* Store host registers */ "push %%" _ASM_DX "; push %%" _ASM_BP ";" @@ -11312,6 +11329,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (ret) return ret; + /* Hide L1D cache contents from the nested guest. */ + vmx->vcpu.arch.l1tf_flush_l1d = true; + /* * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken * by event injection, halt vcpu. diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2f3fe25639b3..b23e8ef3cb71 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -181,6 +181,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "irq_injections", VCPU_STAT(irq_injections) }, { "nmi_injections", VCPU_STAT(nmi_injections) }, { "req_event", VCPU_STAT(req_event) }, + { "l1d_flush", VCPU_STAT(l1d_flush) }, { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, { "mmu_pte_write", VM_STAT(mmu_pte_write) }, { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, @@ -4573,6 +4574,9 @@ static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *v int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + /* kvm_write_guest_virt_system can pull in tons of pages. */ + vcpu->arch.l1tf_flush_l1d = true; + return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, PFERR_WRITE_MASK, exception); } @@ -5701,6 +5705,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, bool writeback = true; bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable; + vcpu->arch.l1tf_flush_l1d = true; + /* * Clear write_fault_to_shadow_pgtable here to ensure it is * never reused. @@ -7146,6 +7152,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) struct kvm *kvm = vcpu->kvm; vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + vcpu->arch.l1tf_flush_l1d = true; for (;;) { if (kvm_vcpu_running(vcpu)) { @@ -8153,6 +8160,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) { + vcpu->arch.l1tf_flush_l1d = true; kvm_x86_ops->sched_in(vcpu, cpu); } -- GitLab From 10309cbf1e3802cec871cf940b8693ae8421f46e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Jun 2018 13:58:37 -0400 Subject: [PATCH 0711/1291] x86/KVM/VMX: Split the VMX MSR LOAD structures to have an host/guest numbers commit 33966dd6b2d2c352fae55412db2ea8cfff5df13a upstream There is no semantic change but this change allows an unbalanced amount of MSRs to be loaded on VMEXIT and VMENTER, i.e. the number of MSRs to save or restore on VMEXIT or VMENTER may be different. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 65 +++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e7eedccc15f2..36779522acd5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -618,6 +618,11 @@ static inline int pi_test_sn(struct pi_desc *pi_desc) (unsigned long *)&pi_desc->control); } +struct vmx_msrs { + unsigned int nr; + struct vmx_msr_entry val[NR_AUTOLOAD_MSRS]; +}; + struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; @@ -651,9 +656,8 @@ struct vcpu_vmx { struct loaded_vmcs *loaded_vmcs; bool __launched; /* temporary, used in vmx_vcpu_run */ struct msr_autoload { - unsigned nr; - struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; - struct vmx_msr_entry host[NR_AUTOLOAD_MSRS]; + struct vmx_msrs guest; + struct vmx_msrs host; } msr_autoload; struct { int loaded; @@ -2041,18 +2045,18 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) } break; } - - for (i = 0; i < m->nr; ++i) - if (m->guest[i].index == msr) + for (i = 0; i < m->guest.nr; ++i) + if (m->guest.val[i].index == msr) break; - if (i == m->nr) + if (i == m->guest.nr) return; - --m->nr; - m->guest[i] = m->guest[m->nr]; - m->host[i] = m->host[m->nr]; - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); + --m->guest.nr; + --m->host.nr; + m->guest.val[i] = m->guest.val[m->guest.nr]; + m->host.val[i] = m->host.val[m->host.nr]; + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); } static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, @@ -2104,24 +2108,25 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, wrmsrl(MSR_IA32_PEBS_ENABLE, 0); } - for (i = 0; i < m->nr; ++i) - if (m->guest[i].index == msr) + for (i = 0; i < m->guest.nr; ++i) + if (m->guest.val[i].index == msr) break; if (i == NR_AUTOLOAD_MSRS) { printk_once(KERN_WARNING "Not enough msr switch entries. " "Can't add msr %x\n", msr); return; - } else if (i == m->nr) { - ++m->nr; - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); + } else if (i == m->guest.nr) { + ++m->guest.nr; + ++m->host.nr; + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); } - m->guest[i].index = msr; - m->guest[i].value = guest_val; - m->host[i].index = msr; - m->host[i].value = host_val; + m->guest.val[i].index = msr; + m->guest.val[i].value = guest_val; + m->host.val[i].index = msr; + m->host.val[i].value = host_val; } static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) @@ -5765,9 +5770,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); - vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); + vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); - vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); + vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); @@ -10901,10 +10906,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * Set the MSR load/store lists to match L0's settings. */ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); - vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); - vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); + vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); + vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); /* * HOST_RSP is normally set correctly in vmx_vcpu_run() just before @@ -11842,8 +11847,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmx_segment_cache_clear(vmx); /* Update any VMCS fields that might have changed while L2 ran */ - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); if (vmx->hv_deadline_tsc == -1) vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, -- GitLab From 916f4623d213b9801ade314b1bf678a113d67af3 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Jun 2018 20:11:39 -0400 Subject: [PATCH 0712/1291] x86/KVM/VMX: Add find_msr() helper function commit ca83b4a7f2d068da79a029d323024aa45decb250 upstream .. to help find the MSR on either the guest or host MSR list. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 36779522acd5..44b0e0026c41 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2022,9 +2022,20 @@ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, vm_exit_controls_clearbit(vmx, exit); } +static int find_msr(struct vmx_msrs *m, unsigned int msr) +{ + unsigned int i; + + for (i = 0; i < m->nr; ++i) { + if (m->val[i].index == msr) + return i; + } + return -ENOENT; +} + static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) { - unsigned i; + int i; struct msr_autoload *m = &vmx->msr_autoload; switch (msr) { @@ -2045,11 +2056,8 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) } break; } - for (i = 0; i < m->guest.nr; ++i) - if (m->guest.val[i].index == msr) - break; - - if (i == m->guest.nr) + i = find_msr(&m->guest, msr); + if (i < 0) return; --m->guest.nr; --m->host.nr; @@ -2073,7 +2081,7 @@ static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, u64 guest_val, u64 host_val) { - unsigned i; + int i; struct msr_autoload *m = &vmx->msr_autoload; switch (msr) { @@ -2108,16 +2116,13 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, wrmsrl(MSR_IA32_PEBS_ENABLE, 0); } - for (i = 0; i < m->guest.nr; ++i) - if (m->guest.val[i].index == msr) - break; - + i = find_msr(&m->guest, msr); if (i == NR_AUTOLOAD_MSRS) { printk_once(KERN_WARNING "Not enough msr switch entries. " "Can't add msr %x\n", msr); return; - } else if (i == m->guest.nr) { - ++m->guest.nr; + } else if (i < 0) { + i = m->guest.nr++; ++m->host.nr; vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); -- GitLab From 0299ca42458cbdee213c47c1c2ebad4432fbb358 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Jun 2018 22:00:47 -0400 Subject: [PATCH 0713/1291] x86/KVM/VMX: Separate the VMX AUTOLOAD guest/host number accounting commit 3190709335dd31fe1aeeebfe4ffb6c7624ef971f upstream This allows to load a different number of MSRs depending on the context: VMEXIT or VMENTER. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 44b0e0026c41..c2d740975a00 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2058,12 +2058,18 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) } i = find_msr(&m->guest, msr); if (i < 0) - return; + goto skip_guest; --m->guest.nr; - --m->host.nr; m->guest.val[i] = m->guest.val[m->guest.nr]; - m->host.val[i] = m->host.val[m->host.nr]; vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); + +skip_guest: + i = find_msr(&m->host, msr); + if (i < 0) + return; + + --m->host.nr; + m->host.val[i] = m->host.val[m->host.nr]; vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); } @@ -2081,7 +2087,7 @@ static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, u64 guest_val, u64 host_val) { - int i; + int i, j; struct msr_autoload *m = &vmx->msr_autoload; switch (msr) { @@ -2117,21 +2123,24 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, } i = find_msr(&m->guest, msr); - if (i == NR_AUTOLOAD_MSRS) { + j = find_msr(&m->host, msr); + if (i == NR_AUTOLOAD_MSRS || j == NR_AUTOLOAD_MSRS) { printk_once(KERN_WARNING "Not enough msr switch entries. " "Can't add msr %x\n", msr); return; - } else if (i < 0) { + } + if (i < 0) { i = m->guest.nr++; - ++m->host.nr; vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); + } + if (j < 0) { + j = m->host.nr++; vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); } - m->guest.val[i].index = msr; m->guest.val[i].value = guest_val; - m->host.val[i].index = msr; - m->host.val[i].value = host_val; + m->host.val[j].index = msr; + m->host.val[j].value = host_val; } static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) -- GitLab From 7b0cdac526410afd2b5c3e0069366799087db790 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Jun 2018 22:01:22 -0400 Subject: [PATCH 0714/1291] x86/KVM/VMX: Extend add_atomic_switch_msr() to allow VMENTER only MSRs commit 989e3992d2eca32c3f1404f2bc91acda3aa122d8 upstream The IA32_FLUSH_CMD MSR needs only to be written on VMENTER. Extend add_atomic_switch_msr() with an entry_only parameter to allow storing the MSR only in the guest (ENTRY) MSR array. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c2d740975a00..f68ece3e2330 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2085,9 +2085,9 @@ static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, } static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, - u64 guest_val, u64 host_val) + u64 guest_val, u64 host_val, bool entry_only) { - int i, j; + int i, j = 0; struct msr_autoload *m = &vmx->msr_autoload; switch (msr) { @@ -2123,7 +2123,9 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, } i = find_msr(&m->guest, msr); - j = find_msr(&m->host, msr); + if (!entry_only) + j = find_msr(&m->host, msr); + if (i == NR_AUTOLOAD_MSRS || j == NR_AUTOLOAD_MSRS) { printk_once(KERN_WARNING "Not enough msr switch entries. " "Can't add msr %x\n", msr); @@ -2133,12 +2135,16 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, i = m->guest.nr++; vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); } + m->guest.val[i].index = msr; + m->guest.val[i].value = guest_val; + + if (entry_only) + return; + if (j < 0) { j = m->host.nr++; vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); } - m->guest.val[i].index = msr; - m->guest.val[i].value = guest_val; m->host.val[j].index = msr; m->host.val[j].value = host_val; } @@ -2184,7 +2190,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) guest_efer &= ~EFER_LME; if (guest_efer != host_efer) add_atomic_switch_msr(vmx, MSR_EFER, - guest_efer, host_efer); + guest_efer, host_efer, false); return false; } else { guest_efer &= ~ignore_bits; @@ -3593,7 +3599,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.ia32_xss = data; if (vcpu->arch.ia32_xss != host_xss) add_atomic_switch_msr(vmx, MSR_IA32_XSS, - vcpu->arch.ia32_xss, host_xss); + vcpu->arch.ia32_xss, host_xss, false); else clear_atomic_switch_msr(vmx, MSR_IA32_XSS); break; @@ -9517,7 +9523,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) clear_atomic_switch_msr(vmx, msrs[i].msr); else add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest, - msrs[i].host); + msrs[i].host, false); } static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) -- GitLab From 1c67bf4ca05328323bac6cfb79c91c69228c2090 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 28 Jun 2018 17:10:36 -0400 Subject: [PATCH 0715/1291] x86/KVM/VMX: Use MSR save list for IA32_FLUSH_CMD if required commit 390d975e0c4e60ce70d4157e0dd91ede37824603 upstream If the L1D flush module parameter is set to 'always' and the IA32_FLUSH_CMD MSR is available, optimize the VMENTER code with the MSR save list. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f68ece3e2330..1ae158a6d306 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5714,6 +5714,16 @@ static void ept_set_mmio_spte_mask(void) VMX_EPT_MISCONFIG_WX_VALUE); } +static bool vmx_l1d_use_msr_save_list(void) +{ + if (!enable_ept || !boot_cpu_has_bug(X86_BUG_L1TF) || + static_cpu_has(X86_FEATURE_HYPERVISOR) || + !static_cpu_has(X86_FEATURE_FLUSH_L1D)) + return false; + + return vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS; +} + #define VMX_XSS_EXIT_BITMAP 0 /* * Sets up the vmcs for emulated real mode. @@ -6061,6 +6071,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); } + /* + * If flushing the L1D cache on every VMENTER is enforced and the + * MSR is available, use the MSR save list. + */ + if (vmx_l1d_use_msr_save_list()) + add_atomic_switch_msr(vmx, MSR_IA32_FLUSH_CMD, L1D_FLUSH, 0, true); } static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) @@ -9082,11 +9098,26 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu) bool always; /* - * If the mitigation mode is 'flush always', keep the flush bit - * set, otherwise clear it. It gets set again either from - * vcpu_run() or from one of the unsafe VMEXIT handlers. + * This code is only executed when: + * - the flush mode is 'cond' + * - the flush mode is 'always' and the flush MSR is not + * available + * + * If the CPU has the flush MSR then clear the flush bit because + * 'always' mode is handled via the MSR save list. + * + * If the MSR is not avaibable then act depending on the mitigation + * mode: If 'flush always', keep the flush bit set, otherwise clear + * it. + * + * The flush bit gets set again either from vcpu_run() or from one + * of the unsafe VMEXIT handlers. */ - always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS; + if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) + always = false; + else + always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS; + vcpu->arch.l1tf_flush_l1d = always; vcpu->stat.l1d_flush++; @@ -12503,7 +12534,8 @@ static int __init vmx_setup_l1d_flush(void) struct page *page; if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER || - !boot_cpu_has_bug(X86_BUG_L1TF)) + !boot_cpu_has_bug(X86_BUG_L1TF) || + vmx_l1d_use_msr_save_list()) return 0; if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { -- GitLab From ac10995a749844c1e98c2a5ba650eddf26d0eba6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 7 Jul 2018 11:40:18 +0200 Subject: [PATCH 0716/1291] cpu/hotplug: Online siblings when SMT control is turned on commit 215af5499d9e2b55f111d2431ea20218115f29b3 upstream Writing 'off' to /sys/devices/system/cpu/smt/control offlines all SMT siblings. Writing 'on' merily enables the abilify to online them, but does not online them automatically. Make 'on' more useful by onlining all offline siblings. Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index e5362830e7ae..7e4c788163cd 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1991,6 +1991,15 @@ static void cpuhp_offline_cpu_device(unsigned int cpu) kobject_uevent(&dev->kobj, KOBJ_OFFLINE); } +static void cpuhp_online_cpu_device(unsigned int cpu) +{ + struct device *dev = get_cpu_device(cpu); + + dev->offline = false; + /* Tell user space about the state change */ + kobject_uevent(&dev->kobj, KOBJ_ONLINE); +} + static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; @@ -2023,11 +2032,24 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) return ret; } -static void cpuhp_smt_enable(void) +static int cpuhp_smt_enable(void) { + int cpu, ret = 0; + cpu_maps_update_begin(); cpu_smt_control = CPU_SMT_ENABLED; + for_each_present_cpu(cpu) { + /* Skip online CPUs and CPUs on offline nodes */ + if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) + continue; + ret = _cpu_up(cpu, 0, CPUHP_ONLINE); + if (ret) + break; + /* See comment in cpuhp_smt_disable() */ + cpuhp_online_cpu_device(cpu); + } cpu_maps_update_done(); + return ret; } static ssize_t @@ -2058,7 +2080,7 @@ store_smt_control(struct device *dev, struct device_attribute *attr, if (ctrlval != cpu_smt_control) { switch (ctrlval) { case CPU_SMT_ENABLED: - cpuhp_smt_enable(); + ret = cpuhp_smt_enable(); break; case CPU_SMT_DISABLED: case CPU_SMT_FORCE_DISABLED: -- GitLab From e7a979648a66fcd8968848069c0b48219af156b4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Jul 2018 16:23:16 +0200 Subject: [PATCH 0717/1291] x86/litf: Introduce vmx status variable commit 72c6d2db64fa18c996ece8f06e499509e6c9a37e upstream Store the effective mitigation of VMX in a status variable and use it to report the VMX state in the l1tf sysfs file. Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20180713142322.433098358@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/vmx.h | 9 +++++++++ arch/x86/kernel/cpu/bugs.c | 36 ++++++++++++++++++++++++++++++++++-- arch/x86/kvm/vmx.c | 22 +++++++++++----------- 3 files changed, 54 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 7c300299e12e..6b9e556acef7 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -571,4 +571,13 @@ enum vm_instruction_error_number { VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28, }; +enum vmx_l1d_flush_state { + VMENTER_L1D_FLUSH_AUTO, + VMENTER_L1D_FLUSH_NEVER, + VMENTER_L1D_FLUSH_COND, + VMENTER_L1D_FLUSH_ALWAYS, +}; + +extern enum vmx_l1d_flush_state l1tf_vmx_mitigation; + #endif diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4dab6fb9db9b..511e6d63d9dd 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -636,6 +637,12 @@ void x86_spec_ctrl_setup_ap(void) #undef pr_fmt #define pr_fmt(fmt) "L1TF: " fmt + +#if IS_ENABLED(CONFIG_KVM_INTEL) +enum vmx_l1d_flush_state l1tf_vmx_mitigation __ro_after_init = VMENTER_L1D_FLUSH_AUTO; +EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation); +#endif + static void __init l1tf_select_mitigation(void) { u64 half_pa; @@ -665,6 +672,32 @@ static void __init l1tf_select_mitigation(void) #ifdef CONFIG_SYSFS +#define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" + +#if IS_ENABLED(CONFIG_KVM_INTEL) +static const char *l1tf_vmx_states[] = { + [VMENTER_L1D_FLUSH_AUTO] = "auto", + [VMENTER_L1D_FLUSH_NEVER] = "vulnerable", + [VMENTER_L1D_FLUSH_COND] = "conditional cache flushes", + [VMENTER_L1D_FLUSH_ALWAYS] = "cache flushes", +}; + +static ssize_t l1tf_show_state(char *buf) +{ + if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) + return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); + + return sprintf(buf, "%s; VMX: SMT %s, L1D %s\n", L1TF_DEFAULT_MSG, + cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled", + l1tf_vmx_states[l1tf_vmx_mitigation]); +} +#else +static ssize_t l1tf_show_state(char *buf) +{ + return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); +} +#endif + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -692,9 +725,8 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_L1TF: if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) - return sprintf(buf, "Mitigation: Page Table Inversion\n"); + return l1tf_show_state(buf); break; - default: break; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1ae158a6d306..860f73746d83 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -196,19 +196,13 @@ extern const ulong vmx_return; static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); -/* These MUST be in sync with vmentry_l1d_param order. */ -enum vmx_l1d_flush_state { - VMENTER_L1D_FLUSH_NEVER, - VMENTER_L1D_FLUSH_COND, - VMENTER_L1D_FLUSH_ALWAYS, -}; - static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush = VMENTER_L1D_FLUSH_COND; static const struct { const char *option; enum vmx_l1d_flush_state cmd; } vmentry_l1d_param[] = { + {"auto", VMENTER_L1D_FLUSH_AUTO}, {"never", VMENTER_L1D_FLUSH_NEVER}, {"cond", VMENTER_L1D_FLUSH_COND}, {"always", VMENTER_L1D_FLUSH_ALWAYS}, @@ -12533,8 +12527,12 @@ static int __init vmx_setup_l1d_flush(void) { struct page *page; + if (!boot_cpu_has_bug(X86_BUG_L1TF)) + return 0; + + l1tf_vmx_mitigation = vmentry_l1d_flush; + if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER || - !boot_cpu_has_bug(X86_BUG_L1TF) || vmx_l1d_use_msr_save_list()) return 0; @@ -12549,12 +12547,14 @@ static int __init vmx_setup_l1d_flush(void) return 0; } -static void vmx_free_l1d_flush_pages(void) +static void vmx_cleanup_l1d_flush(void) { if (vmx_l1d_flush_pages) { free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER); vmx_l1d_flush_pages = NULL; } + /* Restore state so sysfs ignores VMX */ + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; } static int __init vmx_init(void) @@ -12568,7 +12568,7 @@ static int __init vmx_init(void) r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx), THIS_MODULE); if (r) { - vmx_free_l1d_flush_pages(); + vmx_cleanup_l1d_flush(); return r; } @@ -12589,7 +12589,7 @@ static void __exit vmx_exit(void) kvm_exit(); - vmx_free_l1d_flush_pages(); + vmx_cleanup_l1d_flush(); } module_init(vmx_init) -- GitLab From de6749ddf3db5b9da14721973390159addc0ec1c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Jul 2018 16:23:17 +0200 Subject: [PATCH 0718/1291] x86/kvm: Drop L1TF MSR list approach commit 2f055947ae5e2741fb2dc5bba1033c417ccf4faa upstream The VMX module parameter to control the L1D flush should become writeable. The MSR list is set up at VM init per guest VCPU, but the run time switching is based on a static key which is global. Toggling the MSR list at run time might be feasible, but for now drop this optimization and use the regular MSR write to make run-time switching possible. The default mitigation is the conditional flush anyway, so for extra paranoid setups this will add some small overhead, but the extra code executed is in the noise compared to the flush itself. Aside of that the EPT disabled case is not handled correctly at the moment and the MSR list magic is in the way for fixing that as well. If it's really providing a significant advantage, then this needs to be revisited after the code is correct and the control is writable. Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20180713142322.516940445@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 43 +++++++------------------------------------ 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 860f73746d83..ba4ae2bc854f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5708,16 +5708,6 @@ static void ept_set_mmio_spte_mask(void) VMX_EPT_MISCONFIG_WX_VALUE); } -static bool vmx_l1d_use_msr_save_list(void) -{ - if (!enable_ept || !boot_cpu_has_bug(X86_BUG_L1TF) || - static_cpu_has(X86_FEATURE_HYPERVISOR) || - !static_cpu_has(X86_FEATURE_FLUSH_L1D)) - return false; - - return vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS; -} - #define VMX_XSS_EXIT_BITMAP 0 /* * Sets up the vmcs for emulated real mode. @@ -6065,12 +6055,6 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); } - /* - * If flushing the L1D cache on every VMENTER is enforced and the - * MSR is available, use the MSR save list. - */ - if (vmx_l1d_use_msr_save_list()) - add_atomic_switch_msr(vmx, MSR_IA32_FLUSH_CMD, L1D_FLUSH, 0, true); } static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) @@ -9092,26 +9076,14 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu) bool always; /* - * This code is only executed when: - * - the flush mode is 'cond' - * - the flush mode is 'always' and the flush MSR is not - * available - * - * If the CPU has the flush MSR then clear the flush bit because - * 'always' mode is handled via the MSR save list. - * - * If the MSR is not avaibable then act depending on the mitigation - * mode: If 'flush always', keep the flush bit set, otherwise clear - * it. + * This code is only executed when the the flush mode is 'cond' or + * 'always' * - * The flush bit gets set again either from vcpu_run() or from one - * of the unsafe VMEXIT handlers. + * If 'flush always', keep the flush bit set, otherwise clear + * it. The flush bit gets set again either from vcpu_run() or from + * one of the unsafe VMEXIT handlers. */ - if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) - always = false; - else - always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS; - + always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS; vcpu->arch.l1tf_flush_l1d = always; vcpu->stat.l1d_flush++; @@ -12532,8 +12504,7 @@ static int __init vmx_setup_l1d_flush(void) l1tf_vmx_mitigation = vmentry_l1d_flush; - if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER || - vmx_l1d_use_msr_save_list()) + if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER) return 0; if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { -- GitLab From de88416d6141b745c4e22e1a9345d142642e036d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Jul 2018 16:23:18 +0200 Subject: [PATCH 0719/1291] x86/l1tf: Handle EPT disabled state proper commit a7b9020b06ec6d7c3f3b0d4ef1a9eba12654f4f7 upstream If Extended Page Tables (EPT) are disabled or not supported, no L1D flushing is required. The setup function can just avoid setting up the L1D flush for the EPT=n case. Invoke it after the hardware setup has be done and enable_ept has the correct state and expose the EPT disabled state in the mitigation status as well. Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20180713142322.612160168@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/vmx.h | 1 + arch/x86/kernel/cpu/bugs.c | 9 ++++---- arch/x86/kvm/vmx.c | 44 ++++++++++++++++++++++---------------- 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 6b9e556acef7..7b0492817169 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -576,6 +576,7 @@ enum vmx_l1d_flush_state { VMENTER_L1D_FLUSH_NEVER, VMENTER_L1D_FLUSH_COND, VMENTER_L1D_FLUSH_ALWAYS, + VMENTER_L1D_FLUSH_EPT_DISABLED, }; extern enum vmx_l1d_flush_state l1tf_vmx_mitigation; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 511e6d63d9dd..b75828cf7715 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -676,10 +676,11 @@ static void __init l1tf_select_mitigation(void) #if IS_ENABLED(CONFIG_KVM_INTEL) static const char *l1tf_vmx_states[] = { - [VMENTER_L1D_FLUSH_AUTO] = "auto", - [VMENTER_L1D_FLUSH_NEVER] = "vulnerable", - [VMENTER_L1D_FLUSH_COND] = "conditional cache flushes", - [VMENTER_L1D_FLUSH_ALWAYS] = "cache flushes", + [VMENTER_L1D_FLUSH_AUTO] = "auto", + [VMENTER_L1D_FLUSH_NEVER] = "vulnerable", + [VMENTER_L1D_FLUSH_COND] = "conditional cache flushes", + [VMENTER_L1D_FLUSH_ALWAYS] = "cache flushes", + [VMENTER_L1D_FLUSH_EPT_DISABLED] = "EPT disabled", }; static ssize_t l1tf_show_state(char *buf) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ba4ae2bc854f..8ddc6b794a94 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -12502,6 +12502,11 @@ static int __init vmx_setup_l1d_flush(void) if (!boot_cpu_has_bug(X86_BUG_L1TF)) return 0; + if (!enable_ept) { + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; + return 0; + } + l1tf_vmx_mitigation = vmentry_l1d_flush; if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER) @@ -12528,18 +12533,35 @@ static void vmx_cleanup_l1d_flush(void) l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; } + +static void vmx_exit(void) +{ +#ifdef CONFIG_KEXEC_CORE + RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); + synchronize_rcu(); +#endif + + kvm_exit(); + + vmx_cleanup_l1d_flush(); +} +module_exit(vmx_exit) + static int __init vmx_init(void) { int r; - r = vmx_setup_l1d_flush(); + r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), + __alignof__(struct vcpu_vmx), THIS_MODULE); if (r) return r; - r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), - __alignof__(struct vcpu_vmx), THIS_MODULE); + /* + * Must be called after kvm_init() so enable_ept is properly set up + */ + r = vmx_setup_l1d_flush(); if (r) { - vmx_cleanup_l1d_flush(); + vmx_exit(); return r; } @@ -12550,18 +12572,4 @@ static int __init vmx_init(void) return 0; } - -static void __exit vmx_exit(void) -{ -#ifdef CONFIG_KEXEC_CORE - RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); - synchronize_rcu(); -#endif - - kvm_exit(); - - vmx_cleanup_l1d_flush(); -} - module_init(vmx_init) -module_exit(vmx_exit) -- GitLab From d0e1ca1045849f6aa71491b7ca9566cbdd94a32c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Jul 2018 16:23:19 +0200 Subject: [PATCH 0720/1291] x86/kvm: Move l1tf setup function commit 7db92e165ac814487264632ab2624e832f20ae38 upstream In preparation of allowing run time control for L1D flushing, move the setup code to the module parameter handler. In case of pre module init parsing, just store the value and let vmx_init() do the actual setup after running kvm_init() so that enable_ept is having the correct state. During run-time invoke it directly from the parameter setter to prepare for run-time control. Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20180713142322.694063239@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 123 ++++++++++++++++++++++++++++----------------- 1 file changed, 77 insertions(+), 46 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8ddc6b794a94..98fa2911bd33 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -196,7 +196,8 @@ extern const ulong vmx_return; static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); -static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush = VMENTER_L1D_FLUSH_COND; +/* Storage for pre module init parameter parsing */ +static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO; static const struct { const char *option; @@ -208,33 +209,85 @@ static const struct { {"always", VMENTER_L1D_FLUSH_ALWAYS}, }; -static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) +#define L1D_CACHE_ORDER 4 +static void *vmx_l1d_flush_pages; + +static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) { - unsigned int i; + struct page *page; - if (!s) - return -EINVAL; + /* If set to 'auto' select 'cond' */ + if (l1tf == VMENTER_L1D_FLUSH_AUTO) + l1tf = VMENTER_L1D_FLUSH_COND; - for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) { - if (!strcmp(s, vmentry_l1d_param[i].option)) { - vmentry_l1d_flush = vmentry_l1d_param[i].cmd; - return 0; - } + if (!enable_ept) { + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; + return 0; + } + + if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && + !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { + page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); + if (!page) + return -ENOMEM; + vmx_l1d_flush_pages = page_address(page); } + l1tf_vmx_mitigation = l1tf; + + if (l1tf != VMENTER_L1D_FLUSH_NEVER) + static_branch_enable(&vmx_l1d_should_flush); + return 0; +} + +static int vmentry_l1d_flush_parse(const char *s) +{ + unsigned int i; + + if (s) { + for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) { + if (!strcmp(s, vmentry_l1d_param[i].option)) + return vmentry_l1d_param[i].cmd; + } + } return -EINVAL; } +static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) +{ + int l1tf; + + if (!boot_cpu_has(X86_BUG_L1TF)) + return 0; + + l1tf = vmentry_l1d_flush_parse(s); + if (l1tf < 0) + return l1tf; + + /* + * Has vmx_init() run already? If not then this is the pre init + * parameter parsing. In that case just store the value and let + * vmx_init() do the proper setup after enable_ept has been + * established. + */ + if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) { + vmentry_l1d_flush_param = l1tf; + return 0; + } + + return vmx_setup_l1d_flush(l1tf); +} + static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) { - return sprintf(s, "%s\n", vmentry_l1d_param[vmentry_l1d_flush].option); + return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); } static const struct kernel_param_ops vmentry_l1d_flush_ops = { .set = vmentry_l1d_flush_set, .get = vmentry_l1d_flush_get, }; -module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, &vmentry_l1d_flush, S_IRUGO); +module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, S_IRUGO); #define NR_AUTOLOAD_MSRS 8 @@ -9083,7 +9136,7 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu) * it. The flush bit gets set again either from vcpu_run() or from * one of the unsafe VMEXIT handlers. */ - always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS; + always = l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_ALWAYS; vcpu->arch.l1tf_flush_l1d = always; vcpu->stat.l1d_flush++; @@ -12495,34 +12548,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .setup_mce = vmx_setup_mce, }; -static int __init vmx_setup_l1d_flush(void) -{ - struct page *page; - - if (!boot_cpu_has_bug(X86_BUG_L1TF)) - return 0; - - if (!enable_ept) { - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; - return 0; - } - - l1tf_vmx_mitigation = vmentry_l1d_flush; - - if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER) - return 0; - - if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { - page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); - if (!page) - return -ENOMEM; - vmx_l1d_flush_pages = page_address(page); - } - - static_branch_enable(&vmx_l1d_should_flush); - return 0; -} - static void vmx_cleanup_l1d_flush(void) { if (vmx_l1d_flush_pages) { @@ -12557,12 +12582,18 @@ static int __init vmx_init(void) return r; /* - * Must be called after kvm_init() so enable_ept is properly set up + * Must be called after kvm_init() so enable_ept is properly set + * up. Hand the parameter mitigation value in which was stored in + * the pre module init parser. If no parameter was given, it will + * contain 'auto' which will be turned into the default 'cond' + * mitigation mode. */ - r = vmx_setup_l1d_flush(); - if (r) { - vmx_exit(); - return r; + if (boot_cpu_has(X86_BUG_L1TF)) { + r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); + if (r) { + vmx_exit(); + return r; + } } #ifdef CONFIG_KEXEC_CORE -- GitLab From 8e523a1da19e9cfcb4e801bcba2fd1e2d6f5aace Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Jul 2018 16:23:20 +0200 Subject: [PATCH 0721/1291] x86/kvm: Add static key for flush always commit 4c6523ec59fe895ea352a650218a6be0653910b1 upstream Avoid the conditional in the L1D flush control path. Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20180713142322.790914912@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 98fa2911bd33..ae1601fd8998 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -195,6 +195,7 @@ module_param(ple_window_max, int, S_IRUGO); extern const ulong vmx_return; static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); +static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_always); /* Storage for pre module init parameter parsing */ static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO; @@ -235,8 +236,12 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) l1tf_vmx_mitigation = l1tf; - if (l1tf != VMENTER_L1D_FLUSH_NEVER) - static_branch_enable(&vmx_l1d_should_flush); + if (l1tf == VMENTER_L1D_FLUSH_NEVER) + return 0; + + static_branch_enable(&vmx_l1d_should_flush); + if (l1tf == VMENTER_L1D_FLUSH_ALWAYS) + static_branch_enable(&vmx_l1d_flush_always); return 0; } @@ -9126,7 +9131,6 @@ static void *vmx_l1d_flush_pages; static void vmx_l1d_flush(struct kvm_vcpu *vcpu) { int size = PAGE_SIZE << L1D_CACHE_ORDER; - bool always; /* * This code is only executed when the the flush mode is 'cond' or @@ -9136,8 +9140,10 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu) * it. The flush bit gets set again either from vcpu_run() or from * one of the unsafe VMEXIT handlers. */ - always = l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_ALWAYS; - vcpu->arch.l1tf_flush_l1d = always; + if (static_branch_unlikely(&vmx_l1d_flush_always)) + vcpu->arch.l1tf_flush_l1d = true; + else + vcpu->arch.l1tf_flush_l1d = false; vcpu->stat.l1d_flush++; -- GitLab From 15e55ee33d89b9cf1745daf18260ee351058571c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Jul 2018 16:23:21 +0200 Subject: [PATCH 0722/1291] x86/kvm: Serialize L1D flush parameter setter commit dd4bfa739a72508b75760b393d129ed7b431daab upstream Writes to the parameter files are not serialized at the sysfs core level, so local serialization is required. Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20180713142322.873642605@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ae1601fd8998..c1999911f671 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -196,6 +196,7 @@ extern const ulong vmx_return; static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_always); +static DEFINE_MUTEX(vmx_l1d_flush_mutex); /* Storage for pre module init parameter parsing */ static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO; @@ -260,7 +261,7 @@ static int vmentry_l1d_flush_parse(const char *s) static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) { - int l1tf; + int l1tf, ret; if (!boot_cpu_has(X86_BUG_L1TF)) return 0; @@ -280,7 +281,10 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) return 0; } - return vmx_setup_l1d_flush(l1tf); + mutex_lock(&vmx_l1d_flush_mutex); + ret = vmx_setup_l1d_flush(l1tf); + mutex_unlock(&vmx_l1d_flush_mutex); + return ret; } static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) -- GitLab From e25726b1d30247589f36101c7e1e0a9b378fe407 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Jul 2018 16:23:22 +0200 Subject: [PATCH 0723/1291] x86/kvm: Allow runtime control of L1D flush commit 895ae47f9918833c3a880fbccd41e0692b37e7d9 upstream All mitigation modes can be switched at run time with a static key now: - Use sysfs_streq() instead of strcmp() to handle the trailing new line from sysfs writes correctly. - Make the static key management handle multiple invocations properly. - Set the module parameter file to RW Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20180713142322.954525119@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kvm/vmx.c | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b75828cf7715..f1c44735a5a6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -639,7 +639,7 @@ void x86_spec_ctrl_setup_ap(void) #define pr_fmt(fmt) "L1TF: " fmt #if IS_ENABLED(CONFIG_KVM_INTEL) -enum vmx_l1d_flush_state l1tf_vmx_mitigation __ro_after_init = VMENTER_L1D_FLUSH_AUTO; +enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation); #endif diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c1999911f671..c158821bec30 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -237,12 +237,15 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) l1tf_vmx_mitigation = l1tf; - if (l1tf == VMENTER_L1D_FLUSH_NEVER) - return 0; + if (l1tf != VMENTER_L1D_FLUSH_NEVER) + static_branch_enable(&vmx_l1d_should_flush); + else + static_branch_disable(&vmx_l1d_should_flush); - static_branch_enable(&vmx_l1d_should_flush); if (l1tf == VMENTER_L1D_FLUSH_ALWAYS) static_branch_enable(&vmx_l1d_flush_always); + else + static_branch_disable(&vmx_l1d_flush_always); return 0; } @@ -252,7 +255,7 @@ static int vmentry_l1d_flush_parse(const char *s) if (s) { for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) { - if (!strcmp(s, vmentry_l1d_param[i].option)) + if (sysfs_streq(s, vmentry_l1d_param[i].option)) return vmentry_l1d_param[i].cmd; } } @@ -296,7 +299,7 @@ static const struct kernel_param_ops vmentry_l1d_flush_ops = { .set = vmentry_l1d_flush_set, .get = vmentry_l1d_flush_get, }; -module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, S_IRUGO); +module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644); #define NR_AUTOLOAD_MSRS 8 -- GitLab From 8e41ddda308f9ac29fb9dd85d6e6bde7d3a29910 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 13 Jul 2018 16:23:23 +0200 Subject: [PATCH 0724/1291] cpu/hotplug: Expose SMT control init function commit 8e1b706b6e819bed215c0db16345568864660393 upstream The L1TF mitigation will gain a commend line parameter which allows to set a combination of hypervisor mitigation and SMT control. Expose cpu_smt_disable() so the command line parser can tweak SMT settings. [ tglx: Split out of larger patch and made it preserve an already existing force off state ] Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20180713142323.039715135@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/cpu.h | 2 ++ kernel/cpu.c | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d7f08316dce6..36c8a1aecae4 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -187,8 +187,10 @@ enum cpuhp_smt_control { #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) extern enum cpuhp_smt_control cpu_smt_control; +extern void cpu_smt_disable(bool force); #else # define cpu_smt_control (CPU_SMT_ENABLED) +static inline void cpu_smt_disable(bool force) { } #endif #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 7e4c788163cd..e41d453e98a5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -351,13 +351,23 @@ EXPORT_SYMBOL_GPL(cpu_hotplug_enable); enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; EXPORT_SYMBOL_GPL(cpu_smt_control); -static int __init smt_cmdline_disable(char *str) +void __init cpu_smt_disable(bool force) { - cpu_smt_control = CPU_SMT_DISABLED; - if (str && !strcmp(str, "force")) { + if (cpu_smt_control == CPU_SMT_FORCE_DISABLED || + cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + return; + + if (force) { pr_info("SMT: Force disabled\n"); cpu_smt_control = CPU_SMT_FORCE_DISABLED; + } else { + cpu_smt_control = CPU_SMT_DISABLED; } +} + +static int __init smt_cmdline_disable(char *str) +{ + cpu_smt_disable(str && !strcmp(str, "force")); return 0; } early_param("nosmt", smt_cmdline_disable); -- GitLab From 476d29ab70e710b7610148f9c38896bb004f08c2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Jul 2018 16:23:24 +0200 Subject: [PATCH 0725/1291] cpu/hotplug: Set CPU_SMT_NOT_SUPPORTED early commit fee0aede6f4739c87179eca76136f83210953b86 upstream The CPU_SMT_NOT_SUPPORTED state is set (if the processor does not support SMT) when the sysfs SMT control file is initialized. That was fine so far as this was only required to make the output of the control file correct and to prevent writes in that case. With the upcoming l1tf command line parameter, this needs to be set up before the L1TF mitigation selection and command line parsing happens. Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20180713142323.121795971@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 6 ++++++ include/linux/cpu.h | 2 ++ kernel/cpu.c | 13 ++++++++++--- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f1c44735a5a6..603a6e466852 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -58,6 +58,12 @@ void __init check_bugs(void) { identify_boot_cpu(); + /* + * identify_boot_cpu() initialized SMT support information, let the + * core code know. + */ + cpu_smt_check_topology(); + if (!IS_ENABLED(CONFIG_SMP)) { pr_info("CPU: "); print_cpu_info(&boot_cpu_data); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 36c8a1aecae4..c7712e042aba 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -188,9 +188,11 @@ enum cpuhp_smt_control { #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) extern enum cpuhp_smt_control cpu_smt_control; extern void cpu_smt_disable(bool force); +extern void cpu_smt_check_topology(void); #else # define cpu_smt_control (CPU_SMT_ENABLED) static inline void cpu_smt_disable(bool force) { } +static inline void cpu_smt_check_topology(void) { } #endif #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/cpu.c b/kernel/cpu.c index e41d453e98a5..877437989a28 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -365,6 +365,16 @@ void __init cpu_smt_disable(bool force) } } +/* + * The decision whether SMT is supported can only be done after the full + * CPU identification. Called from architecture code. + */ +void __init cpu_smt_check_topology(void) +{ + if (!topology_smt_supported()) + cpu_smt_control = CPU_SMT_NOT_SUPPORTED; +} + static int __init smt_cmdline_disable(char *str) { cpu_smt_disable(str && !strcmp(str, "force")); @@ -2127,9 +2137,6 @@ static const struct attribute_group cpuhp_smt_attr_group = { static int __init cpu_smt_state_init(void) { - if (!topology_smt_supported()) - cpu_smt_control = CPU_SMT_NOT_SUPPORTED; - return sysfs_create_group(&cpu_subsys.dev_root->kobj, &cpuhp_smt_attr_group); } -- GitLab From fc083988b6aa43fe3210792011647c1c1124ab5e Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 13 Jul 2018 16:23:25 +0200 Subject: [PATCH 0726/1291] x86/bugs, kvm: Introduce boot-time control of L1TF mitigations commit d90a7a0ec83fb86622cd7dae23255d3c50a99ec8 upstream Introduce the 'l1tf=' kernel command line option to allow for boot-time switching of mitigation that is used on processors affected by L1TF. The possible values are: full Provides all available mitigations for the L1TF vulnerability. Disables SMT and enables all mitigations in the hypervisors. SMT control via /sys/devices/system/cpu/smt/control is still possible after boot. Hypervisors will issue a warning when the first VM is started in a potentially insecure configuration, i.e. SMT enabled or L1D flush disabled. full,force Same as 'full', but disables SMT control. Implies the 'nosmt=force' command line option. sysfs control of SMT and the hypervisor flush control is disabled. flush Leaves SMT enabled and enables the conditional hypervisor mitigation. Hypervisors will issue a warning when the first VM is started in a potentially insecure configuration, i.e. SMT enabled or L1D flush disabled. flush,nosmt Disables SMT and enables the conditional hypervisor mitigation. SMT control via /sys/devices/system/cpu/smt/control is still possible after boot. If SMT is reenabled or flushing disabled at runtime hypervisors will issue a warning. flush,nowarn Same as 'flush', but hypervisors will not warn when a VM is started in a potentially insecure configuration. off Disables hypervisor mitigations and doesn't emit any warnings. Default is 'flush'. Let KVM adhere to these semantics, which means: - 'lt1f=full,force' : Performe L1D flushes. No runtime control possible. - 'l1tf=full' - 'l1tf-flush' - 'l1tf=flush,nosmt' : Perform L1D flushes and warn on VM start if SMT has been runtime enabled or L1D flushing has been run-time enabled - 'l1tf=flush,nowarn' : Perform L1D flushes and no warnings are emitted. - 'l1tf=off' : L1D flushes are not performed and no warnings are emitted. KVM can always override the L1D flushing behavior using its 'vmentry_l1d_flush' module parameter except when lt1f=full,force is set. This makes KVM's private 'nosmt' option redundant, and as it is a bit non-systematic anyway (this is something to control globally, not on hypervisor level), remove that option. Add the missing Documentation entry for the l1tf vulnerability sysfs file while at it. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20180713142323.202758176@linutronix.de Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-system-cpu | 4 ++ .../admin-guide/kernel-parameters.txt | 68 +++++++++++++++++-- arch/x86/include/asm/processor.h | 12 ++++ arch/x86/kernel/cpu/bugs.c | 44 ++++++++++++ arch/x86/kvm/vmx.c | 56 +++++++++++---- 5 files changed, 165 insertions(+), 19 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index c9c9e60d3e7c..6cae60929cb6 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -379,6 +379,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/spectre_v1 /sys/devices/system/cpu/vulnerabilities/spectre_v2 /sys/devices/system/cpu/vulnerabilities/spec_store_bypass + /sys/devices/system/cpu/vulnerabilities/l1tf Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities @@ -391,6 +392,9 @@ Description: Information about CPU vulnerabilities "Vulnerable" CPU is affected and no mitigation in effect "Mitigation: $M" CPU is affected and mitigation $M is in effect + Details about the l1tf file can be found in + Documentation/admin-guide/l1tf.rst + What: /sys/devices/system/cpu/smt /sys/devices/system/cpu/smt/active /sys/devices/system/cpu/smt/control diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e928ccff7d23..9841bad6f271 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1867,12 +1867,6 @@ [KVM,ARM] Trap guest accesses to GICv3 common system registers - kvm-intel.nosmt=[KVM,Intel] If the L1TF CPU bug is present (CVE-2018-3620) - and the system has SMT (aka Hyper-Threading) enabled then - don't allow guests to be created. - - Default is 0 (allow guests to be created). - kvm-intel.ept= [KVM,Intel] Disable extended page tables (virtualized MMU) support on capable Intel chips. Default is 1 (enabled) @@ -1910,6 +1904,68 @@ feature (tagged TLBs) on capable Intel chips. Default is 1 (enabled) + l1tf= [X86] Control mitigation of the L1TF vulnerability on + affected CPUs + + The kernel PTE inversion protection is unconditionally + enabled and cannot be disabled. + + full + Provides all available mitigations for the + L1TF vulnerability. Disables SMT and + enables all mitigations in the + hypervisors, i.e. unconditional L1D flush. + + SMT control and L1D flush control via the + sysfs interface is still possible after + boot. Hypervisors will issue a warning + when the first VM is started in a + potentially insecure configuration, + i.e. SMT enabled or L1D flush disabled. + + full,force + Same as 'full', but disables SMT and L1D + flush runtime control. Implies the + 'nosmt=force' command line option. + (i.e. sysfs control of SMT is disabled.) + + flush + Leaves SMT enabled and enables the default + hypervisor mitigation, i.e. conditional + L1D flush. + + SMT control and L1D flush control via the + sysfs interface is still possible after + boot. Hypervisors will issue a warning + when the first VM is started in a + potentially insecure configuration, + i.e. SMT enabled or L1D flush disabled. + + flush,nosmt + + Disables SMT and enables the default + hypervisor mitigation. + + SMT control and L1D flush control via the + sysfs interface is still possible after + boot. Hypervisors will issue a warning + when the first VM is started in a + potentially insecure configuration, + i.e. SMT enabled or L1D flush disabled. + + flush,nowarn + Same as 'flush', but hypervisors will not + warn when a VM is started in a potentially + insecure configuration. + + off + Disables hypervisor mitigations and doesn't + emit any warnings. + + Default is 'flush'. + + For details see: Documentation/admin-guide/l1tf.rst + l2cr= [PPC] l3cr= [PPC] diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 267a8da7c694..0e856c0628b3 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -974,4 +974,16 @@ bool xen_set_default_idle(void); void stop_this_cpu(void *dummy); void df_debug(struct pt_regs *regs, long error_code); void microcode_check(void); + +enum l1tf_mitigations { + L1TF_MITIGATION_OFF, + L1TF_MITIGATION_FLUSH_NOWARN, + L1TF_MITIGATION_FLUSH, + L1TF_MITIGATION_FLUSH_NOSMT, + L1TF_MITIGATION_FULL, + L1TF_MITIGATION_FULL_FORCE +}; + +extern enum l1tf_mitigations l1tf_mitigation; + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 603a6e466852..2409446189e9 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -644,7 +644,11 @@ void x86_spec_ctrl_setup_ap(void) #undef pr_fmt #define pr_fmt(fmt) "L1TF: " fmt +/* Default mitigation for L1TF-affected CPUs */ +enum l1tf_mitigations l1tf_mitigation __ro_after_init = L1TF_MITIGATION_FLUSH; #if IS_ENABLED(CONFIG_KVM_INTEL) +EXPORT_SYMBOL_GPL(l1tf_mitigation); + enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation); #endif @@ -656,6 +660,20 @@ static void __init l1tf_select_mitigation(void) if (!boot_cpu_has_bug(X86_BUG_L1TF)) return; + switch (l1tf_mitigation) { + case L1TF_MITIGATION_OFF: + case L1TF_MITIGATION_FLUSH_NOWARN: + case L1TF_MITIGATION_FLUSH: + break; + case L1TF_MITIGATION_FLUSH_NOSMT: + case L1TF_MITIGATION_FULL: + cpu_smt_disable(false); + break; + case L1TF_MITIGATION_FULL_FORCE: + cpu_smt_disable(true); + break; + } + #if CONFIG_PGTABLE_LEVELS == 2 pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n"); return; @@ -674,6 +692,32 @@ static void __init l1tf_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV); } + +static int __init l1tf_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_L1TF)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + l1tf_mitigation = L1TF_MITIGATION_OFF; + else if (!strcmp(str, "flush,nowarn")) + l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOWARN; + else if (!strcmp(str, "flush")) + l1tf_mitigation = L1TF_MITIGATION_FLUSH; + else if (!strcmp(str, "flush,nosmt")) + l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT; + else if (!strcmp(str, "full")) + l1tf_mitigation = L1TF_MITIGATION_FULL; + else if (!strcmp(str, "full,force")) + l1tf_mitigation = L1TF_MITIGATION_FULL_FORCE; + + return 0; +} +early_param("l1tf", l1tf_cmdline); + #undef pr_fmt #ifdef CONFIG_SYSFS diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c158821bec30..d077869f9228 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -70,9 +70,6 @@ static const struct x86_cpu_id vmx_cpu_id[] = { }; MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); -static bool __read_mostly nosmt; -module_param(nosmt, bool, S_IRUGO); - static bool __read_mostly enable_vpid = 1; module_param_named(vpid, enable_vpid, bool, 0444); @@ -218,15 +215,31 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) { struct page *page; - /* If set to 'auto' select 'cond' */ - if (l1tf == VMENTER_L1D_FLUSH_AUTO) - l1tf = VMENTER_L1D_FLUSH_COND; - if (!enable_ept) { l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; return 0; } + /* If set to auto use the default l1tf mitigation method */ + if (l1tf == VMENTER_L1D_FLUSH_AUTO) { + switch (l1tf_mitigation) { + case L1TF_MITIGATION_OFF: + l1tf = VMENTER_L1D_FLUSH_NEVER; + break; + case L1TF_MITIGATION_FLUSH_NOWARN: + case L1TF_MITIGATION_FLUSH: + case L1TF_MITIGATION_FLUSH_NOSMT: + l1tf = VMENTER_L1D_FLUSH_COND; + break; + case L1TF_MITIGATION_FULL: + case L1TF_MITIGATION_FULL_FORCE: + l1tf = VMENTER_L1D_FLUSH_ALWAYS; + break; + } + } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) { + l1tf = VMENTER_L1D_FLUSH_ALWAYS; + } + if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); @@ -10036,16 +10049,33 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) return ERR_PTR(err); } -#define L1TF_MSG "SMT enabled with L1TF CPU bug present. Refer to CVE-2018-3620 for details.\n" +#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" +#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" static int vmx_vm_init(struct kvm *kvm) { - if (boot_cpu_has(X86_BUG_L1TF) && cpu_smt_control == CPU_SMT_ENABLED) { - if (nosmt) { - pr_err(L1TF_MSG); - return -EOPNOTSUPP; + if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) { + switch (l1tf_mitigation) { + case L1TF_MITIGATION_OFF: + case L1TF_MITIGATION_FLUSH_NOWARN: + /* 'I explicitly don't care' is set */ + break; + case L1TF_MITIGATION_FLUSH: + case L1TF_MITIGATION_FLUSH_NOSMT: + case L1TF_MITIGATION_FULL: + /* + * Warn upon starting the first VM in a potentially + * insecure environment. + */ + if (cpu_smt_control == CPU_SMT_ENABLED) + pr_warn_once(L1TF_MSG_SMT); + if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER) + pr_warn_once(L1TF_MSG_L1D); + break; + case L1TF_MITIGATION_FULL_FORCE: + /* Flush is enforced */ + break; } - pr_warn(L1TF_MSG); } return 0; } -- GitLab From a20c88c2a346dd90ce244f73647cd6bd8e1cacb3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Jul 2018 16:23:26 +0200 Subject: [PATCH 0727/1291] Documentation: Add section about CPU vulnerabilities commit 3ec8ce5d866ec6a08a9cfab82b62acf4a830b35f upstream Add documentation for the L1TF vulnerability and the mitigation mechanisms: - Explain the problem and risks - Document the mitigation mechanisms - Document the command line controls - Document the sysfs files Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Acked-by: Linus Torvalds Link: https://lkml.kernel.org/r/20180713142323.287429944@linutronix.de Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/index.rst | 9 + Documentation/admin-guide/l1tf.rst | 591 ++++++++++++++++++++++++++++ 2 files changed, 600 insertions(+) create mode 100644 Documentation/admin-guide/l1tf.rst diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index 5bb9161dbe6a..78f8f00c369f 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -17,6 +17,15 @@ etc. kernel-parameters devices +This section describes CPU vulnerabilities and provides an overview of the +possible mitigations along with guidance for selecting mitigations if they +are configurable at compile, boot or run time. + +.. toctree:: + :maxdepth: 1 + + l1tf + Here is a set of documents aimed at users who are trying to track down problems and bugs in particular. diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/l1tf.rst new file mode 100644 index 000000000000..f2bb28cff439 --- /dev/null +++ b/Documentation/admin-guide/l1tf.rst @@ -0,0 +1,591 @@ +L1TF - L1 Terminal Fault +======================== + +L1 Terminal Fault is a hardware vulnerability which allows unprivileged +speculative access to data which is available in the Level 1 Data Cache +when the page table entry controlling the virtual address, which is used +for the access, has the Present bit cleared or other reserved bits set. + +Affected processors +------------------- + +This vulnerability affects a wide range of Intel processors. The +vulnerability is not present on: + + - Processors from AMD, Centaur and other non Intel vendors + + - Older processor models, where the CPU family is < 6 + + - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft, + Penwell, Pineview, Slivermont, Airmont, Merrifield) + + - The Intel Core Duo Yonah variants (2006 - 2008) + + - The Intel XEON PHI family + + - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the + IA32_ARCH_CAPABILITIES MSR. If the bit is set the CPU is not affected + by the Meltdown vulnerability either. These CPUs should become + available by end of 2018. + +Whether a processor is affected or not can be read out from the L1TF +vulnerability file in sysfs. See :ref:`l1tf_sys_info`. + +Related CVEs +------------ + +The following CVE entries are related to the L1TF vulnerability: + + ============= ================= ============================== + CVE-2018-3615 L1 Terminal Fault SGX related aspects + CVE-2018-3620 L1 Terminal Fault OS, SMM related aspects + CVE-2018-3646 L1 Terminal Fault Virtualization related aspects + ============= ================= ============================== + +Problem +------- + +If an instruction accesses a virtual address for which the relevant page +table entry (PTE) has the Present bit cleared or other reserved bits set, +then speculative execution ignores the invalid PTE and loads the referenced +data if it is present in the Level 1 Data Cache, as if the page referenced +by the address bits in the PTE was still present and accessible. + +While this is a purely speculative mechanism and the instruction will raise +a page fault when it is retired eventually, the pure act of loading the +data and making it available to other speculative instructions opens up the +opportunity for side channel attacks to unprivileged malicious code, +similar to the Meltdown attack. + +While Meltdown breaks the user space to kernel space protection, L1TF +allows to attack any physical memory address in the system and the attack +works across all protection domains. It allows an attack of SGX and also +works from inside virtual machines because the speculation bypasses the +extended page table (EPT) protection mechanism. + + +Attack scenarios +---------------- + +1. Malicious user space +^^^^^^^^^^^^^^^^^^^^^^^ + + Operating Systems store arbitrary information in the address bits of a + PTE which is marked non present. This allows a malicious user space + application to attack the physical memory to which these PTEs resolve. + In some cases user-space can maliciously influence the information + encoded in the address bits of the PTE, thus making attacks more + deterministic and more practical. + + The Linux kernel contains a mitigation for this attack vector, PTE + inversion, which is permanently enabled and has no performance + impact. The kernel ensures that the address bits of PTEs, which are not + marked present, never point to cacheable physical memory space. + + A system with an up to date kernel is protected against attacks from + malicious user space applications. + +2. Malicious guest in a virtual machine +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The fact that L1TF breaks all domain protections allows malicious guest + OSes, which can control the PTEs directly, and malicious guest user + space applications, which run on an unprotected guest kernel lacking the + PTE inversion mitigation for L1TF, to attack physical host memory. + + A special aspect of L1TF in the context of virtualization is symmetric + multi threading (SMT). The Intel implementation of SMT is called + HyperThreading. The fact that Hyperthreads on the affected processors + share the L1 Data Cache (L1D) is important for this. As the flaw allows + only to attack data which is present in L1D, a malicious guest running + on one Hyperthread can attack the data which is brought into the L1D by + the context which runs on the sibling Hyperthread of the same physical + core. This context can be host OS, host user space or a different guest. + + If the processor does not support Extended Page Tables, the attack is + only possible, when the hypervisor does not sanitize the content of the + effective (shadow) page tables. + + While solutions exist to mitigate these attack vectors fully, these + mitigations are not enabled by default in the Linux kernel because they + can affect performance significantly. The kernel provides several + mechanisms which can be utilized to address the problem depending on the + deployment scenario. The mitigations, their protection scope and impact + are described in the next sections. + + The default mitigations and the rationale for chosing them are explained + at the end of this document. See :ref:`default_mitigations`. + +.. _l1tf_sys_info: + +L1TF system information +----------------------- + +The Linux kernel provides a sysfs interface to enumerate the current L1TF +status of the system: whether the system is vulnerable, and which +mitigations are active. The relevant sysfs file is: + +/sys/devices/system/cpu/vulnerabilities/l1tf + +The possible values in this file are: + + =========================== =============================== + 'Not affected' The processor is not vulnerable + 'Mitigation: PTE Inversion' The host protection is active + =========================== =============================== + +If KVM/VMX is enabled and the processor is vulnerable then the following +information is appended to the 'Mitigation: PTE Inversion' part: + + - SMT status: + + ===================== ================ + 'VMX: SMT vulnerable' SMT is enabled + 'VMX: SMT disabled' SMT is disabled + ===================== ================ + + - L1D Flush mode: + + ================================ ==================================== + 'L1D vulnerable' L1D flushing is disabled + + 'L1D conditional cache flushes' L1D flush is conditionally enabled + + 'L1D cache flushes' L1D flush is unconditionally enabled + ================================ ==================================== + +The resulting grade of protection is discussed in the following sections. + + +Host mitigation mechanism +------------------------- + +The kernel is unconditionally protected against L1TF attacks from malicious +user space running on the host. + + +Guest mitigation mechanisms +--------------------------- + +.. _l1d_flush: + +1. L1D flush on VMENTER +^^^^^^^^^^^^^^^^^^^^^^^ + + To make sure that a guest cannot attack data which is present in the L1D + the hypervisor flushes the L1D before entering the guest. + + Flushing the L1D evicts not only the data which should not be accessed + by a potentially malicious guest, it also flushes the guest + data. Flushing the L1D has a performance impact as the processor has to + bring the flushed guest data back into the L1D. Depending on the + frequency of VMEXIT/VMENTER and the type of computations in the guest + performance degradation in the range of 1% to 50% has been observed. For + scenarios where guest VMEXIT/VMENTER are rare the performance impact is + minimal. Virtio and mechanisms like posted interrupts are designed to + confine the VMEXITs to a bare minimum, but specific configurations and + application scenarios might still suffer from a high VMEXIT rate. + + The kernel provides two L1D flush modes: + - conditional ('cond') + - unconditional ('always') + + The conditional mode avoids L1D flushing after VMEXITs which execute + only audited code pathes before the corresponding VMENTER. These code + pathes have beed verified that they cannot expose secrets or other + interesting data to an attacker, but they can leak information about the + address space layout of the hypervisor. + + Unconditional mode flushes L1D on all VMENTER invocations and provides + maximum protection. It has a higher overhead than the conditional + mode. The overhead cannot be quantified correctly as it depends on the + work load scenario and the resulting number of VMEXITs. + + The general recommendation is to enable L1D flush on VMENTER. The kernel + defaults to conditional mode on affected processors. + + **Note**, that L1D flush does not prevent the SMT problem because the + sibling thread will also bring back its data into the L1D which makes it + attackable again. + + L1D flush can be controlled by the administrator via the kernel command + line and sysfs control files. See :ref:`mitigation_control_command_line` + and :ref:`mitigation_control_kvm`. + +.. _guest_confinement: + +2. Guest VCPU confinement to dedicated physical cores +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + To address the SMT problem, it is possible to make a guest or a group of + guests affine to one or more physical cores. The proper mechanism for + that is to utilize exclusive cpusets to ensure that no other guest or + host tasks can run on these cores. + + If only a single guest or related guests run on sibling SMT threads on + the same physical core then they can only attack their own memory and + restricted parts of the host memory. + + Host memory is attackable, when one of the sibling SMT threads runs in + host OS (hypervisor) context and the other in guest context. The amount + of valuable information from the host OS context depends on the context + which the host OS executes, i.e. interrupts, soft interrupts and kernel + threads. The amount of valuable data from these contexts cannot be + declared as non-interesting for an attacker without deep inspection of + the code. + + **Note**, that assigning guests to a fixed set of physical cores affects + the ability of the scheduler to do load balancing and might have + negative effects on CPU utilization depending on the hosting + scenario. Disabling SMT might be a viable alternative for particular + scenarios. + + For further information about confining guests to a single or to a group + of cores consult the cpusets documentation: + + https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt + +.. _interrupt_isolation: + +3. Interrupt affinity +^^^^^^^^^^^^^^^^^^^^^ + + Interrupts can be made affine to logical CPUs. This is not universally + true because there are types of interrupts which are truly per CPU + interrupts, e.g. the local timer interrupt. Aside of that multi queue + devices affine their interrupts to single CPUs or groups of CPUs per + queue without allowing the administrator to control the affinities. + + Moving the interrupts, which can be affinity controlled, away from CPUs + which run untrusted guests, reduces the attack vector space. + + Whether the interrupts with are affine to CPUs, which run untrusted + guests, provide interesting data for an attacker depends on the system + configuration and the scenarios which run on the system. While for some + of the interrupts it can be assumed that they wont expose interesting + information beyond exposing hints about the host OS memory layout, there + is no way to make general assumptions. + + Interrupt affinity can be controlled by the administrator via the + /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is + available at: + + https://www.kernel.org/doc/Documentation/IRQ-affinity.txt + +.. _smt_control: + +4. SMT control +^^^^^^^^^^^^^^ + + To prevent the SMT issues of L1TF it might be necessary to disable SMT + completely. Disabling SMT can have a significant performance impact, but + the impact depends on the hosting scenario and the type of workloads. + The impact of disabling SMT needs also to be weighted against the impact + of other mitigation solutions like confining guests to dedicated cores. + + The kernel provides a sysfs interface to retrieve the status of SMT and + to control it. It also provides a kernel command line interface to + control SMT. + + The kernel command line interface consists of the following options: + + =========== ========================================================== + nosmt Affects the bring up of the secondary CPUs during boot. The + kernel tries to bring all present CPUs online during the + boot process. "nosmt" makes sure that from each physical + core only one - the so called primary (hyper) thread is + activated. Due to a design flaw of Intel processors related + to Machine Check Exceptions the non primary siblings have + to be brought up at least partially and are then shut down + again. "nosmt" can be undone via the sysfs interface. + + nosmt=force Has the same effect as "nosmt' but it does not allow to + undo the SMT disable via the sysfs interface. + =========== ========================================================== + + The sysfs interface provides two files: + + - /sys/devices/system/cpu/smt/control + - /sys/devices/system/cpu/smt/active + + /sys/devices/system/cpu/smt/control: + + This file allows to read out the SMT control state and provides the + ability to disable or (re)enable SMT. The possible states are: + + ============== =================================================== + on SMT is supported by the CPU and enabled. All + logical CPUs can be onlined and offlined without + restrictions. + + off SMT is supported by the CPU and disabled. Only + the so called primary SMT threads can be onlined + and offlined without restrictions. An attempt to + online a non-primary sibling is rejected + + forceoff Same as 'off' but the state cannot be controlled. + Attempts to write to the control file are rejected. + + notsupported The processor does not support SMT. It's therefore + not affected by the SMT implications of L1TF. + Attempts to write to the control file are rejected. + ============== =================================================== + + The possible states which can be written into this file to control SMT + state are: + + - on + - off + - forceoff + + /sys/devices/system/cpu/smt/active: + + This file reports whether SMT is enabled and active, i.e. if on any + physical core two or more sibling threads are online. + + SMT control is also possible at boot time via the l1tf kernel command + line parameter in combination with L1D flush control. See + :ref:`mitigation_control_command_line`. + +5. Disabling EPT +^^^^^^^^^^^^^^^^ + + Disabling EPT for virtual machines provides full mitigation for L1TF even + with SMT enabled, because the effective page tables for guests are + managed and sanitized by the hypervisor. Though disabling EPT has a + significant performance impact especially when the Meltdown mitigation + KPTI is enabled. + + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. + +There is ongoing research and development for new mitigation mechanisms to +address the performance impact of disabling SMT or EPT. + +.. _mitigation_control_command_line: + +Mitigation control on the kernel command line +--------------------------------------------- + +The kernel command line allows to control the L1TF mitigations at boot +time with the option "l1tf=". The valid arguments for this option are: + + ============ ============================================================= + full Provides all available mitigations for the L1TF + vulnerability. Disables SMT and enables all mitigations in + the hypervisors, i.e. unconditional L1D flushing + + SMT control and L1D flush control via the sysfs interface + is still possible after boot. Hypervisors will issue a + warning when the first VM is started in a potentially + insecure configuration, i.e. SMT enabled or L1D flush + disabled. + + full,force Same as 'full', but disables SMT and L1D flush runtime + control. Implies the 'nosmt=force' command line option. + (i.e. sysfs control of SMT is disabled.) + + flush Leaves SMT enabled and enables the default hypervisor + mitigation, i.e. conditional L1D flushing + + SMT control and L1D flush control via the sysfs interface + is still possible after boot. Hypervisors will issue a + warning when the first VM is started in a potentially + insecure configuration, i.e. SMT enabled or L1D flush + disabled. + + flush,nosmt Disables SMT and enables the default hypervisor mitigation, + i.e. conditional L1D flushing. + + SMT control and L1D flush control via the sysfs interface + is still possible after boot. Hypervisors will issue a + warning when the first VM is started in a potentially + insecure configuration, i.e. SMT enabled or L1D flush + disabled. + + flush,nowarn Same as 'flush', but hypervisors will not warn when a VM is + started in a potentially insecure configuration. + + off Disables hypervisor mitigations and doesn't emit any + warnings. + ============ ============================================================= + +The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`. + + +.. _mitigation_control_kvm: + +Mitigation control for KVM - module parameter +------------------------------------------------------------- + +The KVM hypervisor mitigation mechanism, flushing the L1D cache when +entering a guest, can be controlled with a module parameter. + +The option/parameter is "kvm-intel.vmentry_l1d_flush=". It takes the +following arguments: + + ============ ============================================================== + always L1D cache flush on every VMENTER. + + cond Flush L1D on VMENTER only when the code between VMEXIT and + VMENTER can leak host memory which is considered + interesting for an attacker. This still can leak host memory + which allows e.g. to determine the hosts address space layout. + + never Disables the mitigation + ============ ============================================================== + +The parameter can be provided on the kernel command line, as a module +parameter when loading the modules and at runtime modified via the sysfs +file: + +/sys/module/kvm_intel/parameters/vmentry_l1d_flush + +The default is 'cond'. If 'l1tf=full,force' is given on the kernel command +line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush +module parameter is ignored and writes to the sysfs file are rejected. + + +Mitigation selection guide +-------------------------- + +1. No virtualization in use +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The system is protected by the kernel unconditionally and no further + action is required. + +2. Virtualization with trusted guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + If the guest comes from a trusted source and the guest OS kernel is + guaranteed to have the L1TF mitigations in place the system is fully + protected against L1TF and no further action is required. + + To avoid the overhead of the default L1D flushing on VMENTER the + administrator can disable the flushing via the kernel command line and + sysfs control files. See :ref:`mitigation_control_command_line` and + :ref:`mitigation_control_kvm`. + + +3. Virtualization with untrusted guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +3.1. SMT not supported or disabled +"""""""""""""""""""""""""""""""""" + + If SMT is not supported by the processor or disabled in the BIOS or by + the kernel, it's only required to enforce L1D flushing on VMENTER. + + Conditional L1D flushing is the default behaviour and can be tuned. See + :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`. + +3.2. EPT not supported or disabled +"""""""""""""""""""""""""""""""""" + + If EPT is not supported by the processor or disabled in the hypervisor, + the system is fully protected. SMT can stay enabled and L1D flushing on + VMENTER is not required. + + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. + +3.3. SMT and EPT supported and active +""""""""""""""""""""""""""""""""""""" + + If SMT and EPT are supported and active then various degrees of + mitigations can be employed: + + - L1D flushing on VMENTER: + + L1D flushing on VMENTER is the minimal protection requirement, but it + is only potent in combination with other mitigation methods. + + Conditional L1D flushing is the default behaviour and can be tuned. See + :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`. + + - Guest confinement: + + Confinement of guests to a single or a group of physical cores which + are not running any other processes, can reduce the attack surface + significantly, but interrupts, soft interrupts and kernel threads can + still expose valuable data to a potential attacker. See + :ref:`guest_confinement`. + + - Interrupt isolation: + + Isolating the guest CPUs from interrupts can reduce the attack surface + further, but still allows a malicious guest to explore a limited amount + of host physical memory. This can at least be used to gain knowledge + about the host address space layout. The interrupts which have a fixed + affinity to the CPUs which run the untrusted guests can depending on + the scenario still trigger soft interrupts and schedule kernel threads + which might expose valuable information. See + :ref:`interrupt_isolation`. + +The above three mitigation methods combined can provide protection to a +certain degree, but the risk of the remaining attack surface has to be +carefully analyzed. For full protection the following methods are +available: + + - Disabling SMT: + + Disabling SMT and enforcing the L1D flushing provides the maximum + amount of protection. This mitigation is not depending on any of the + above mitigation methods. + + SMT control and L1D flushing can be tuned by the command line + parameters 'nosmt', 'l1tf', 'kvm-intel.vmentry_l1d_flush' and at run + time with the matching sysfs control files. See :ref:`smt_control`, + :ref:`mitigation_control_command_line` and + :ref:`mitigation_control_kvm`. + + - Disabling EPT: + + Disabling EPT provides the maximum amount of protection as well. It is + not depending on any of the above mitigation methods. SMT can stay + enabled and L1D flushing is not required, but the performance impact is + significant. + + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' + parameter. + + +.. _default_mitigations: + +Default mitigations +------------------- + + The kernel default mitigations for vulnerable processors are: + + - PTE inversion to protect against malicious user space. This is done + unconditionally and cannot be controlled. + + - L1D conditional flushing on VMENTER when EPT is enabled for + a guest. + + The kernel does not by default enforce the disabling of SMT, which leaves + SMT systems vulnerable when running untrusted guests with EPT enabled. + + The rationale for this choice is: + + - Force disabling SMT can break existing setups, especially with + unattended updates. + + - If regular users run untrusted guests on their machine, then L1TF is + just an add on to other malware which might be embedded in an untrusted + guest, e.g. spam-bots or attacks on the local network. + + There is no technical way to prevent a user from running untrusted code + on their machines blindly. + + - It's technically extremely unlikely and from today's knowledge even + impossible that L1TF can be exploited via the most popular attack + mechanisms like JavaScript because these mechanisms have no way to + control PTEs. If this would be possible and not other mitigation would + be possible, then the default might be different. + + - The administrators of cloud and hosting setups have to carefully + analyze the risk for their scenarios and make the appropriate + mitigation choices, which might even vary across their deployed + machines and also result in other changes of their overall setup. + There is no way for the kernel to provide a sensible default for this + kind of scenarios. -- GitLab From ae217320c17df01283add6f8a549c3fc70580423 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Wed, 18 Jul 2018 19:07:38 +0200 Subject: [PATCH 0728/1291] x86/KVM/VMX: Initialize the vmx_l1d_flush_pages' content commit 288d152c23dcf3c09da46c5c481903ca10ebfef7 upstream The slow path in vmx_l1d_flush() reads from vmx_l1d_flush_pages in order to evict the L1d cache. However, these pages are never cleared and, in theory, their data could be leaked. More importantly, KSM could merge a nested hypervisor's vmx_l1d_flush_pages to fewer than 1 << L1D_CACHE_ORDER host physical pages and this would break the L1d flushing algorithm: L1D on x86_64 is tagged by physical addresses. Fix this by initializing the individual vmx_l1d_flush_pages with a different pattern each. Rename the "empty_zp" asm constraint identifier in vmx_l1d_flush() to "flush_pages" to reflect this change. Fixes: a47dd5f06714 ("x86/KVM/VMX: Add L1D flush algorithm") Signed-off-by: Nicolai Stange Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d077869f9228..56c410841298 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -214,6 +214,7 @@ static void *vmx_l1d_flush_pages; static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) { struct page *page; + unsigned int i; if (!enable_ept) { l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; @@ -246,6 +247,16 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) if (!page) return -ENOMEM; vmx_l1d_flush_pages = page_address(page); + + /* + * Initialize each page with a different pattern in + * order to protect against KSM in the nested + * virtualization case. + */ + for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) { + memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1, + PAGE_SIZE); + } } l1tf_vmx_mitigation = l1tf; @@ -9176,7 +9187,7 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu) /* First ensure the pages are in the TLB */ "xorl %%eax, %%eax\n" ".Lpopulate_tlb:\n\t" - "movzbl (%[empty_zp], %%" _ASM_AX "), %%ecx\n\t" + "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" "addl $4096, %%eax\n\t" "cmpl %%eax, %[size]\n\t" "jne .Lpopulate_tlb\n\t" @@ -9185,12 +9196,12 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu) /* Now fill the cache */ "xorl %%eax, %%eax\n" ".Lfill_cache:\n" - "movzbl (%[empty_zp], %%" _ASM_AX "), %%ecx\n\t" + "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" "addl $64, %%eax\n\t" "cmpl %%eax, %[size]\n\t" "jne .Lfill_cache\n\t" "lfence\n" - :: [empty_zp] "r" (vmx_l1d_flush_pages), + :: [flush_pages] "r" (vmx_l1d_flush_pages), [size] "r" (size) : "eax", "ebx", "ecx", "edx"); } -- GitLab From 40b696da70cfcd1d38be557c3bc7d87cfa25dae6 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 19 Jul 2018 13:49:58 -0700 Subject: [PATCH 0729/1291] Documentation/l1tf: Fix typos commit 1949f9f49792d65dba2090edddbe36a5f02e3ba3 upstream Fix spelling and other typos Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/l1tf.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/l1tf.rst index f2bb28cff439..ccf649cabdcd 100644 --- a/Documentation/admin-guide/l1tf.rst +++ b/Documentation/admin-guide/l1tf.rst @@ -17,7 +17,7 @@ vulnerability is not present on: - Older processor models, where the CPU family is < 6 - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft, - Penwell, Pineview, Slivermont, Airmont, Merrifield) + Penwell, Pineview, Silvermont, Airmont, Merrifield) - The Intel Core Duo Yonah variants (2006 - 2008) @@ -113,7 +113,7 @@ Attack scenarios deployment scenario. The mitigations, their protection scope and impact are described in the next sections. - The default mitigations and the rationale for chosing them are explained + The default mitigations and the rationale for choosing them are explained at the end of this document. See :ref:`default_mitigations`. .. _l1tf_sys_info: @@ -191,15 +191,15 @@ Guest mitigation mechanisms - unconditional ('always') The conditional mode avoids L1D flushing after VMEXITs which execute - only audited code pathes before the corresponding VMENTER. These code - pathes have beed verified that they cannot expose secrets or other + only audited code paths before the corresponding VMENTER. These code + paths have been verified that they cannot expose secrets or other interesting data to an attacker, but they can leak information about the address space layout of the hypervisor. Unconditional mode flushes L1D on all VMENTER invocations and provides maximum protection. It has a higher overhead than the conditional mode. The overhead cannot be quantified correctly as it depends on the - work load scenario and the resulting number of VMEXITs. + workload scenario and the resulting number of VMEXITs. The general recommendation is to enable L1D flush on VMENTER. The kernel defaults to conditional mode on affected processors. @@ -262,7 +262,7 @@ Guest mitigation mechanisms Whether the interrupts with are affine to CPUs, which run untrusted guests, provide interesting data for an attacker depends on the system configuration and the scenarios which run on the system. While for some - of the interrupts it can be assumed that they wont expose interesting + of the interrupts it can be assumed that they won't expose interesting information beyond exposing hints about the host OS memory layout, there is no way to make general assumptions. @@ -299,7 +299,7 @@ Guest mitigation mechanisms to be brought up at least partially and are then shut down again. "nosmt" can be undone via the sysfs interface. - nosmt=force Has the same effect as "nosmt' but it does not allow to + nosmt=force Has the same effect as "nosmt" but it does not allow to undo the SMT disable via the sysfs interface. =========== ========================================================== -- GitLab From c6613521abbc2685ef334af32e455589d14f1d8d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 25 Jul 2018 10:36:45 +0200 Subject: [PATCH 0730/1291] cpu/hotplug: detect SMT disabled by BIOS commit 73d5e2b472640b1fcdb61ae8be389912ef211bda upstream If SMT is disabled in BIOS, the CPU code doesn't properly detect it. The /sys/devices/system/cpu/smt/control file shows 'on', and the 'l1tf' vulnerabilities file shows SMT as vulnerable. Fix it by forcing 'cpu_smt_control' to CPU_SMT_NOT_SUPPORTED in such a case. Unfortunately the detection can only be done after bringing all the CPUs online, so we have to overwrite any previous writes to the variable. Reported-by: Joe Mario Tested-by: Jiri Kosina Fixes: f048c399e0f7 ("x86/topology: Provide topology_smt_supported()") Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/cpu.c b/kernel/cpu.c index 877437989a28..5787d5e043da 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2137,6 +2137,15 @@ static const struct attribute_group cpuhp_smt_attr_group = { static int __init cpu_smt_state_init(void) { + /* + * If SMT was disabled by BIOS, detect it here, after the CPUs have + * been brought online. This ensures the smt/l1tf sysfs entries are + * consistent with reality. Note this may overwrite cpu_smt_control's + * previous setting. + */ + if (topology_max_smt_threads() == 1) + cpu_smt_control = CPU_SMT_NOT_SUPPORTED; + return sysfs_create_group(&cpu_subsys.dev_root->kobj, &cpuhp_smt_attr_group); } -- GitLab From c6a43c04233bdf63104b3c7ef6984cdaa3aa9ecd Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Sat, 21 Jul 2018 22:16:56 +0200 Subject: [PATCH 0731/1291] x86/KVM/VMX: Don't set l1tf_flush_l1d to true from vmx_l1d_flush() commit 379fd0c7e6a391e5565336a646f19f218fb98c6c upstream vmx_l1d_flush() gets invoked only if l1tf_flush_l1d is true. There's no point in setting l1tf_flush_l1d to true from there again. Signed-off-by: Nicolai Stange Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 56c410841298..c11b7e1a9eb0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9166,15 +9166,15 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu) /* * This code is only executed when the the flush mode is 'cond' or * 'always' - * - * If 'flush always', keep the flush bit set, otherwise clear - * it. The flush bit gets set again either from vcpu_run() or from - * one of the unsafe VMEXIT handlers. */ - if (static_branch_unlikely(&vmx_l1d_flush_always)) - vcpu->arch.l1tf_flush_l1d = true; - else + if (!static_branch_unlikely(&vmx_l1d_flush_always)) { + /* + * Clear the flush bit, it gets set again either from + * vcpu_run() or from one of the unsafe VMEXIT + * handlers. + */ vcpu->arch.l1tf_flush_l1d = false; + } vcpu->stat.l1d_flush++; -- GitLab From 9baeea57aff241b5b454db99373479dae157a30f Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Sat, 21 Jul 2018 22:25:00 +0200 Subject: [PATCH 0732/1291] x86/KVM/VMX: Replace 'vmx_l1d_flush_always' with 'vmx_l1d_flush_cond' commit 427362a142441f08051369db6fbe7f61c73b3dca upstream The vmx_l1d_flush_always static key is only ever evaluated if vmx_l1d_should_flush is enabled. In that case however, there are only two L1d flushing modes possible: "always" and "conditional". The "conditional" mode's implementation tends to require more sophisticated logic than the "always" mode. Avoid inverted logic by replacing the 'vmx_l1d_flush_always' static key with a 'vmx_l1d_flush_cond' one. There is no change in functionality. Signed-off-by: Nicolai Stange Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c11b7e1a9eb0..785d8169ecaa 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -192,7 +192,7 @@ module_param(ple_window_max, int, S_IRUGO); extern const ulong vmx_return; static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); -static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_always); +static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); static DEFINE_MUTEX(vmx_l1d_flush_mutex); /* Storage for pre module init parameter parsing */ @@ -266,10 +266,10 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) else static_branch_disable(&vmx_l1d_should_flush); - if (l1tf == VMENTER_L1D_FLUSH_ALWAYS) - static_branch_enable(&vmx_l1d_flush_always); + if (l1tf == VMENTER_L1D_FLUSH_COND) + static_branch_enable(&vmx_l1d_flush_cond); else - static_branch_disable(&vmx_l1d_flush_always); + static_branch_disable(&vmx_l1d_flush_cond); return 0; } @@ -9167,7 +9167,7 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu) * This code is only executed when the the flush mode is 'cond' or * 'always' */ - if (!static_branch_unlikely(&vmx_l1d_flush_always)) { + if (static_branch_likely(&vmx_l1d_flush_cond)) { /* * Clear the flush bit, it gets set again either from * vcpu_run() or from one of the unsafe VMEXIT -- GitLab From 0d6b3085975fafd909d26d37310c641f8e5e1be9 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Sat, 21 Jul 2018 22:35:28 +0200 Subject: [PATCH 0733/1291] x86/KVM/VMX: Move the l1tf_flush_l1d test to vmx_l1d_flush() commit 5b6ccc6c3b1a477fbac9ec97a0b4c1c48e765209 upstream Currently, vmx_vcpu_run() checks if l1tf_flush_l1d is set and invokes vmx_l1d_flush() if so. This test is unncessary for the "always flush L1D" mode. Move the check to vmx_l1d_flush()'s conditional mode code path. Notes: - vmx_l1d_flush() is likely to get inlined anyway and thus, there's no extra function call. - This inverts the (static) branch prediction, but there hadn't been any explicit likely()/unlikely() annotations before and so it stays as is. Signed-off-by: Nicolai Stange Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 785d8169ecaa..60ffeb3c60ee 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9168,12 +9168,16 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu) * 'always' */ if (static_branch_likely(&vmx_l1d_flush_cond)) { + bool flush_l1d = vcpu->arch.l1tf_flush_l1d; + /* * Clear the flush bit, it gets set again either from * vcpu_run() or from one of the unsafe VMEXIT * handlers. */ vcpu->arch.l1tf_flush_l1d = false; + if (!flush_l1d) + return; } vcpu->stat.l1d_flush++; @@ -9703,10 +9707,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->__launched = vmx->loaded_vmcs->launched; - if (static_branch_unlikely(&vmx_l1d_should_flush)) { - if (vcpu->arch.l1tf_flush_l1d) - vmx_l1d_flush(vcpu); - } + if (static_branch_unlikely(&vmx_l1d_should_flush)) + vmx_l1d_flush(vcpu); asm( /* Store host registers */ -- GitLab From 88f8090b9cbec23fac99eb4d7b4ea785c3beac51 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Fri, 27 Jul 2018 12:46:29 +0200 Subject: [PATCH 0734/1291] x86/irq: Demote irq_cpustat_t::__softirq_pending to u16 commit 9aee5f8a7e30330d0a8f4c626dc924ca5590aba5 upstream An upcoming patch will extend KVM's L1TF mitigation in conditional mode to also cover interrupts after VMEXITs. For tracking those, stores to a new per-cpu flag from interrupt handlers will become necessary. In order to improve cache locality, this new flag will be added to x86's irq_cpustat_t. Make some space available there by shrinking the ->softirq_pending bitfield from 32 to 16 bits: the number of bits actually used is only NR_SOFTIRQS, i.e. 10. Suggested-by: Paolo Bonzini Signed-off-by: Nicolai Stange Signed-off-by: Thomas Gleixner Reviewed-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/hardirq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 51cc979dd364..2ff9d68361ba 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -6,7 +6,7 @@ #include typedef struct { - unsigned int __softirq_pending; + u16 __softirq_pending; unsigned int __nmi_count; /* arch dependent */ #ifdef CONFIG_X86_LOCAL_APIC unsigned int apic_timer_irqs; /* arch dependent */ -- GitLab From 06fd9ef44f7c642710dbe482d342c22ee2e1cd13 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Fri, 27 Jul 2018 13:22:16 +0200 Subject: [PATCH 0735/1291] x86/KVM/VMX: Introduce per-host-cpu analogue of l1tf_flush_l1d commit 45b575c00d8e72d69d75dd8c112f044b7b01b069 upstream Part of the L1TF mitigation for vmx includes flushing the L1D cache upon VMENTRY. L1D flushes are costly and two modes of operations are provided to users: "always" and the more selective "conditional" mode. If operating in the latter, the cache would get flushed only if a host side code path considered unconfined had been traversed. "Unconfined" in this context means that it might have pulled in sensitive data like user data or kernel crypto keys. The need for L1D flushes is tracked by means of the per-vcpu flag l1tf_flush_l1d. KVM exit handlers considered unconfined set it. A vmx_l1d_flush() subsequently invoked before the next VMENTER will conduct a L1d flush based on its value and reset that flag again. Currently, interrupts delivered "normally" while in root operation between VMEXIT and VMENTER are not taken into account. Part of the reason is that these don't leave any traces and thus, the vmx code is unable to tell if any such has happened. As proposed by Paolo Bonzini, prepare for tracking all interrupts by introducing a new per-cpu flag, "kvm_cpu_l1tf_flush_l1d". It will be in strong analogy to the per-vcpu ->l1tf_flush_l1d. A later patch will make interrupt handlers set it. For the sake of cache locality, group kvm_cpu_l1tf_flush_l1d into x86' per-cpu irq_cpustat_t as suggested by Peter Zijlstra. Provide the helpers kvm_set_cpu_l1tf_flush_l1d(), kvm_clear_cpu_l1tf_flush_l1d() and kvm_get_cpu_l1tf_flush_l1d(). Make them trivial resp. non-existent for !CONFIG_KVM_INTEL as appropriate. Let vmx_l1d_flush() handle kvm_cpu_l1tf_flush_l1d in the same way as l1tf_flush_l1d. Suggested-by: Paolo Bonzini Suggested-by: Peter Zijlstra Signed-off-by: Nicolai Stange Signed-off-by: Thomas Gleixner Reviewed-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/hardirq.h | 23 +++++++++++++++++++++++ arch/x86/kvm/vmx.c | 17 +++++++++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 2ff9d68361ba..261ef5d8c181 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -7,6 +7,9 @@ typedef struct { u16 __softirq_pending; +#if IS_ENABLED(CONFIG_KVM_INTEL) + u8 kvm_cpu_l1tf_flush_l1d; +#endif unsigned int __nmi_count; /* arch dependent */ #ifdef CONFIG_X86_LOCAL_APIC unsigned int apic_timer_irqs; /* arch dependent */ @@ -62,4 +65,24 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu); extern u64 arch_irq_stat(void); #define arch_irq_stat arch_irq_stat + +#if IS_ENABLED(CONFIG_KVM_INTEL) +static inline void kvm_set_cpu_l1tf_flush_l1d(void) +{ + __this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1); +} + +static inline void kvm_clear_cpu_l1tf_flush_l1d(void) +{ + __this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0); +} + +static inline bool kvm_get_cpu_l1tf_flush_l1d(void) +{ + return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d); +} +#else /* !IS_ENABLED(CONFIG_KVM_INTEL) */ +static inline void kvm_set_cpu_l1tf_flush_l1d(void) { } +#endif /* IS_ENABLED(CONFIG_KVM_INTEL) */ + #endif /* _ASM_X86_HARDIRQ_H */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 60ffeb3c60ee..54fef40254d1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9168,14 +9168,23 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu) * 'always' */ if (static_branch_likely(&vmx_l1d_flush_cond)) { - bool flush_l1d = vcpu->arch.l1tf_flush_l1d; + bool flush_l1d; /* - * Clear the flush bit, it gets set again either from - * vcpu_run() or from one of the unsafe VMEXIT - * handlers. + * Clear the per-vcpu flush bit, it gets set again + * either from vcpu_run() or from one of the unsafe + * VMEXIT handlers. */ + flush_l1d = vcpu->arch.l1tf_flush_l1d; vcpu->arch.l1tf_flush_l1d = false; + + /* + * Clear the per-cpu flush bit, it gets set again from + * the interrupt handlers. + */ + flush_l1d |= kvm_get_cpu_l1tf_flush_l1d(); + kvm_clear_cpu_l1tf_flush_l1d(); + if (!flush_l1d) return; } -- GitLab From 18f891ef7a631cac76e25d09a37f582690d852dd Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Sun, 29 Jul 2018 12:15:33 +0200 Subject: [PATCH 0736/1291] x86: Don't include linux/irq.h from asm/hardirq.h commit 447ae316670230d7d29430e2cbf1f5db4f49d14c upstream The next patch in this series will have to make the definition of irq_cpustat_t available to entering_irq(). Inclusion of asm/hardirq.h into asm/apic.h would cause circular header dependencies like asm/smp.h asm/apic.h asm/hardirq.h linux/irq.h linux/topology.h linux/smp.h asm/smp.h or linux/gfp.h linux/mmzone.h asm/mmzone.h asm/mmzone_64.h asm/smp.h asm/apic.h asm/hardirq.h linux/irq.h linux/irqdesc.h linux/kobject.h linux/sysfs.h linux/kernfs.h linux/idr.h linux/gfp.h and others. This causes compilation errors because of the header guards becoming effective in the second inclusion: symbols/macros that had been defined before wouldn't be available to intermediate headers in the #include chain anymore. A possible workaround would be to move the definition of irq_cpustat_t into its own header and include that from both, asm/hardirq.h and asm/apic.h. However, this wouldn't solve the real problem, namely asm/harirq.h unnecessarily pulling in all the linux/irq.h cruft: nothing in asm/hardirq.h itself requires it. Also, note that there are some other archs, like e.g. arm64, which don't have that #include in their asm/hardirq.h. Remove the linux/irq.h #include from x86' asm/hardirq.h. Fix resulting compilation errors by adding appropriate #includes to *.c files as needed. Note that some of these *.c files could be cleaned up a bit wrt. to their set of #includes, but that should better be done from separate patches, if at all. Signed-off-by: Nicolai Stange Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/dmi.h | 2 +- arch/x86/include/asm/hardirq.h | 1 - arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kernel/apic/apic.c | 2 ++ arch/x86/kernel/apic/htirq.c | 2 ++ arch/x86/kernel/apic/io_apic.c | 1 + arch/x86/kernel/apic/msi.c | 1 + arch/x86/kernel/apic/vector.c | 1 + arch/x86/kernel/fpu/core.c | 1 + arch/x86/kernel/ftrace.c | 1 + arch/x86/kernel/hpet.c | 1 + arch/x86/kernel/i8259.c | 1 + arch/x86/kernel/idt.c | 1 + arch/x86/kernel/irq.c | 1 + arch/x86/kernel/irq_32.c | 1 + arch/x86/kernel/irq_64.c | 1 + arch/x86/kernel/irqinit.c | 1 + arch/x86/kernel/kprobes/core.c | 1 + arch/x86/kernel/smpboot.c | 1 + arch/x86/kernel/time.c | 1 + arch/x86/mm/fault.c | 1 + arch/x86/mm/pti.c | 1 + arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c | 1 + arch/x86/xen/enlighten.c | 1 + drivers/gpu/drm/i915/intel_lpe_audio.c | 1 + drivers/pci/host/pci-hyperv.c | 2 ++ 26 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index 0ab2ab27ad1f..b825cb201251 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h @@ -4,8 +4,8 @@ #include #include +#include -#include #include static __always_inline __init void *dmi_alloc(unsigned len) diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 261ef5d8c181..486c843273c4 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -3,7 +3,6 @@ #define _ASM_X86_HARDIRQ_H #include -#include typedef struct { u16 __softirq_pending; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a41d58822070..c835d4f59112 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 0a10d3ed77ce..348fb896b5b4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ #include #include #include +#include unsigned int num_processors; diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c index 56ccf9346b08..741de281ed5d 100644 --- a/arch/x86/kernel/apic/htirq.c +++ b/arch/x86/kernel/apic/htirq.c @@ -16,6 +16,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3b89b27945ff..96a8a68f9c79 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 9b18be764422..f10e7f93b0e2 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -12,6 +12,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 2ce1c708b8ee..b958082c74a7 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -11,6 +11,7 @@ * published by the Free Software Foundation. */ #include +#include #include #include #include diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index f92a6593de1e..2ea85b32421a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 01ebcb6f263e..7acb87cb2da8 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -27,6 +27,7 @@ #include #include +#include #include #include diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 8ce4212e2b8d..afa1a204bc6d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 8f5cb2c7060c..02abc134367f 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 0c5256653d6c..38c3d5790970 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -8,6 +8,7 @@ #include #include #include +#include struct idt_data { unsigned int vector; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index aa9d51eea9d0..3c2326b59820 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index c1bdbd3d3232..95600a99ae93 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index d86e344f5b3d..0469cd078db1 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 1e4094eba15e..40f83d0d7b8a 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 1e7709337827..65452d555f05 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -63,6 +63,7 @@ #include #include #include +#include #include "common.h" diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 60dc95d7d96d..6fd12e0a07c9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -78,6 +78,7 @@ #include #include #include +#include /* Number of siblings per CPU package */ int smp_num_siblings = 1; diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 879af864d99a..49a5c394f3ed 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 0133d26f16be..c2faff548f59 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -24,6 +24,7 @@ #include /* emulate_vsyscall */ #include /* struct vm86 */ #include /* vma_pkey() */ +#include #define CREATE_TRACE_POINTS #include diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index ce38f165489b..d6f11accd37a 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -45,6 +45,7 @@ #include #include #include +#include #undef pr_fmt #define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c index 4f5fa65a1011..2acd6be13375 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c @@ -18,6 +18,7 @@ #include #include #include +#include #define TANGIER_EXT_TIMER0_MSI 12 diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c9081c6671f0..df208af3cd74 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -3,6 +3,7 @@ #endif #include #include +#include #include #include diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 3bf65288ffff..2fdf302ebdad 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -62,6 +62,7 @@ #include #include +#include #include #include diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index 4523d7e1bcb9..ffc87a956d97 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -53,6 +53,8 @@ #include #include #include +#include + #include #include #include -- GitLab From aef13e1e96b7449f8346392ad1e38db86e6e75a2 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Sun, 29 Jul 2018 13:06:04 +0200 Subject: [PATCH 0737/1291] x86/irq: Let interrupt handlers set kvm_cpu_l1tf_flush_l1d commit ffcba43ff66c7dab34ec700debd491d2a4d319b4 upstream The last missing piece to having vmx_l1d_flush() take interrupts after VMEXIT into account is to set the kvm_cpu_l1tf_flush_l1d per-cpu flag on irq entry. Issue calls to kvm_set_cpu_l1tf_flush_l1d() from entering_irq(), ipi_entering_ack_irq(), smp_reschedule_interrupt() and uv_bau_message_interrupt(). Suggested-by: Paolo Bonzini Signed-off-by: Nicolai Stange Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/apic.h | 3 +++ arch/x86/kernel/smp.c | 1 + arch/x86/platform/uv/tlb_uv.c | 1 + 3 files changed, 5 insertions(+) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 77ffd3b7894f..a1ed92aae12a 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -10,6 +10,7 @@ #include #include #include +#include #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -626,6 +627,7 @@ extern void irq_exit(void); static inline void entering_irq(void) { irq_enter(); + kvm_set_cpu_l1tf_flush_l1d(); } static inline void entering_ack_irq(void) @@ -638,6 +640,7 @@ static inline void ipi_entering_ack_irq(void) { irq_enter(); ack_APIC_irq(); + kvm_set_cpu_l1tf_flush_l1d(); } static inline void exiting_irq(void) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 5c574dff4c1a..04adc8d60aed 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -261,6 +261,7 @@ __visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); inc_irq_stat(irq_resched_count); + kvm_set_cpu_l1tf_flush_l1d(); if (trace_resched_ipi_enabled()) { /* diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 0b530c53de1f..34f9a9ce6236 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1285,6 +1285,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs) struct msg_desc msgdesc; ack_APIC_irq(); + kvm_set_cpu_l1tf_flush_l1d(); time_start = get_cycles(); bcp = &per_cpu(bau_control, smp_processor_id()); -- GitLab From f9625775c30945439e412413de813404046e6f97 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Sun, 22 Jul 2018 13:38:18 +0200 Subject: [PATCH 0738/1291] x86/KVM/VMX: Don't set l1tf_flush_l1d from vmx_handle_external_intr() commit 18b57ce2eb8c8b9a24174a89250cf5f57c76ecdc upstream For VMEXITs caused by external interrupts, vmx_handle_external_intr() indirectly calls into the interrupt handlers through the host's IDT. It follows that these interrupts get accounted for in the kvm_cpu_l1tf_flush_l1d per-cpu flag. The subsequently executed vmx_l1d_flush() will thus be aware that some interrupts have happened and conduct a L1d flush anyway. Setting l1tf_flush_l1d from vmx_handle_external_intr() isn't needed anymore. Drop it. Signed-off-by: Nicolai Stange Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 54fef40254d1..c9b957d6e313 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9460,7 +9460,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) [ss]"i"(__KERNEL_DS), [cs]"i"(__KERNEL_CS) ); - vcpu->arch.l1tf_flush_l1d = true; } } STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); -- GitLab From dc6c443e175bebad177c3d81d9c16bf7002cb4ba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Aug 2018 17:06:12 +0200 Subject: [PATCH 0739/1291] Documentation/l1tf: Remove Yonah processors from not vulnerable list commit 58331136136935c631c2b5f06daf4c3006416e91 upstream Dave reported, that it's not confirmed that Yonah processors are unaffected. Remove them from the list. Reported-by: ave Hansen Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/l1tf.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/l1tf.rst index ccf649cabdcd..5dadb4503ec9 100644 --- a/Documentation/admin-guide/l1tf.rst +++ b/Documentation/admin-guide/l1tf.rst @@ -19,8 +19,6 @@ vulnerability is not present on: - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft, Penwell, Pineview, Silvermont, Airmont, Merrifield) - - The Intel Core Duo Yonah variants (2006 - 2008) - - The Intel XEON PHI family - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the -- GitLab From f0660d587efed81cfe3d734d77e3fb03fce0b15b Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 21 Feb 2018 13:39:51 -0600 Subject: [PATCH 0740/1291] KVM: x86: Add a framework for supporting MSR-based features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 801e459a6f3a63af9d447e6249088c76ae16efc4 upstream Provide a new KVM capability that allows bits within MSRs to be recognized as features. Two new ioctls are added to the /dev/kvm ioctl routine to retrieve the list of these MSRs and then retrieve their values. A kvm_x86_ops callback is used to determine support for the listed MSR-based features. Signed-off-by: Tom Lendacky Signed-off-by: Paolo Bonzini [Tweaked documentation. - Radim] Signed-off-by: Radim Krčmář Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- Documentation/virtual/kvm/api.txt | 40 ++++++++++++----- arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/svm.c | 6 +++ arch/x86/kvm/vmx.c | 6 +++ arch/x86/kvm/x86.c | 75 ++++++++++++++++++++++++++++--- include/uapi/linux/kvm.h | 2 + 6 files changed, 114 insertions(+), 17 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 88ad78c6f605..5d12166bd66b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -123,14 +123,15 @@ memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the flag KVM_VM_MIPS_VZ. -4.3 KVM_GET_MSR_INDEX_LIST +4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST -Capability: basic +Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST Architectures: x86 -Type: system +Type: system ioctl Parameters: struct kvm_msr_list (in/out) Returns: 0 on success; -1 on error Errors: + EFAULT: the msr index list cannot be read from or written to E2BIG: the msr index list is to be to fit in the array specified by the user. @@ -139,16 +140,23 @@ struct kvm_msr_list { __u32 indices[0]; }; -This ioctl returns the guest msrs that are supported. The list varies -by kvm version and host processor, but does not change otherwise. The -user fills in the size of the indices array in nmsrs, and in return -kvm adjusts nmsrs to reflect the actual number of msrs and fills in -the indices array with their numbers. +The user fills in the size of the indices array in nmsrs, and in return +kvm adjusts nmsrs to reflect the actual number of msrs and fills in the +indices array with their numbers. + +KVM_GET_MSR_INDEX_LIST returns the guest msrs that are supported. The list +varies by kvm version and host processor, but does not change otherwise. Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are not returned in the MSR list, as different vcpus can have a different number of banks, as set via the KVM_X86_SETUP_MCE ioctl. +KVM_GET_MSR_FEATURE_INDEX_LIST returns the list of MSRs that can be passed +to the KVM_GET_MSRS system ioctl. This lets userspace probe host capabilities +and processor features that are exposed via MSRs (e.g., VMX capabilities). +This list also varies by kvm version and host processor, but does not change +otherwise. + 4.4 KVM_CHECK_EXTENSION @@ -475,14 +483,22 @@ Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead. 4.18 KVM_GET_MSRS -Capability: basic +Capability: basic (vcpu), KVM_CAP_GET_MSR_FEATURES (system) Architectures: x86 -Type: vcpu ioctl +Type: system ioctl, vcpu ioctl Parameters: struct kvm_msrs (in/out) -Returns: 0 on success, -1 on error +Returns: number of msrs successfully returned; + -1 on error + +When used as a system ioctl: +Reads the values of MSR-based features that are available for the VM. This +is similar to KVM_GET_SUPPORTED_CPUID, but it returns MSR indices and values. +The list of msr-based features can be obtained using KVM_GET_MSR_FEATURE_INDEX_LIST +in a system ioctl. +When used as a vcpu ioctl: Reads model-specific registers from the vcpu. Supported msr indices can -be obtained using KVM_GET_MSR_INDEX_LIST. +be obtained using KVM_GET_MSR_INDEX_LIST in a system ioctl. struct kvm_msrs { __u32 nmsrs; /* number of msrs in entries */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c835d4f59112..017dbbde399e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1066,6 +1066,8 @@ struct kvm_x86_ops { void (*cancel_hv_timer)(struct kvm_vcpu *vcpu); void (*setup_mce)(struct kvm_vcpu *vcpu); + + int (*get_msr_feature)(struct kvm_msr_entry *entry); }; struct kvm_arch_async_pf { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index cfa155078ebb..e3ee1c61fc92 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3555,6 +3555,11 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } +static int svm_get_msr_feature(struct kvm_msr_entry *msr) +{ + return 1; +} + static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct vcpu_svm *svm = to_svm(vcpu); @@ -5588,6 +5593,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .vcpu_unblocking = svm_vcpu_unblocking, .update_bp_intercept = update_bp_intercept, + .get_msr_feature = svm_get_msr_feature, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c9b957d6e313..fe583f044c0e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3425,6 +3425,11 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, return !(val & ~valid_bits); } +static int vmx_get_msr_feature(struct kvm_msr_entry *msr) +{ + return 1; +} + /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -12505,6 +12510,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .vcpu_put = vmx_vcpu_put, .update_bp_intercept = update_exception_bitmap, + .get_msr_feature = vmx_get_msr_feature, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b23e8ef3cb71..792fa37d5dcd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1042,6 +1042,28 @@ static u32 emulated_msrs[] = { static unsigned num_emulated_msrs; +/* + * List of msr numbers which are used to expose MSR-based features that + * can be used by a hypervisor to validate requested CPU features. + */ +static u32 msr_based_features[] = { +}; + +static unsigned int num_msr_based_features; + +static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) +{ + struct kvm_msr_entry msr; + + msr.index = index; + if (kvm_x86_ops->get_msr_feature(&msr)) + return 1; + + *data = msr.data; + + return 0; +} + bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) { if (efer & efer_reserved_bits) @@ -2601,13 +2623,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data)) { - int i, idx; + int i; - idx = srcu_read_lock(&vcpu->kvm->srcu); for (i = 0; i < msrs->nmsrs; ++i) if (do_msr(vcpu, entries[i].index, &entries[i].data)) break; - srcu_read_unlock(&vcpu->kvm->srcu, idx); return i; } @@ -2706,6 +2726,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SET_BOOT_CPU_ID: case KVM_CAP_SPLIT_IRQCHIP: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_GET_MSR_FEATURES: r = 1; break; case KVM_CAP_ADJUST_CLOCK: @@ -2820,6 +2841,31 @@ long kvm_arch_dev_ioctl(struct file *filp, goto out; r = 0; break; + case KVM_GET_MSR_FEATURE_INDEX_LIST: { + struct kvm_msr_list __user *user_msr_list = argp; + struct kvm_msr_list msr_list; + unsigned int n; + + r = -EFAULT; + if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list))) + goto out; + n = msr_list.nmsrs; + msr_list.nmsrs = num_msr_based_features; + if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list))) + goto out; + r = -E2BIG; + if (n < msr_list.nmsrs) + goto out; + r = -EFAULT; + if (copy_to_user(user_msr_list->indices, &msr_based_features, + num_msr_based_features * sizeof(u32))) + goto out; + r = 0; + break; + } + case KVM_GET_MSRS: + r = msr_io(NULL, argp, do_get_msr_feature, 1); + break; } default: r = -EINVAL; @@ -3554,12 +3600,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_MSRS: + case KVM_GET_MSRS: { + int idx = srcu_read_lock(&vcpu->kvm->srcu); r = msr_io(vcpu, argp, do_get_msr, 1); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; - case KVM_SET_MSRS: + } + case KVM_SET_MSRS: { + int idx = srcu_read_lock(&vcpu->kvm->srcu); r = msr_io(vcpu, argp, do_set_msr, 0); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; + } case KVM_TPR_ACCESS_REPORTING: { struct kvm_tpr_access_ctl tac; @@ -4334,6 +4386,19 @@ static void kvm_init_msr_list(void) j++; } num_emulated_msrs = j; + + for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) { + struct kvm_msr_entry msr; + + msr.index = msr_based_features[i]; + if (kvm_x86_ops->get_msr_feature(&msr)) + continue; + + if (j < i) + msr_based_features[j] = msr_based_features[i]; + j++; + } + num_msr_based_features = j; } static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 857bad91c454..27c62abb6c9e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -761,6 +761,7 @@ struct kvm_ppc_resize_hpt { #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 #define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) +#define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list) /* * Extension capability list. @@ -932,6 +933,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_SYNIC2 148 #define KVM_CAP_HYPERV_VP_INDEX 149 #define KVM_CAP_S390_BPB 152 +#define KVM_CAP_GET_MSR_FEATURES 153 #ifdef KVM_CAP_IRQ_ROUTING -- GitLab From 52613b7abbd8b123130d84cbe28830b843036388 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Sat, 24 Feb 2018 00:18:20 +0100 Subject: [PATCH 0741/1291] KVM: SVM: Add MSR-based feature support for serializing LFENCE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d1d93fa90f1afa926cb060b7f78ab01a65705b4d upstream In order to determine if LFENCE is a serializing instruction on AMD processors, MSR 0xc0011029 (MSR_F10H_DECFG) must be read and the state of bit 1 checked. This patch will add support to allow a guest to properly make this determination. Add the MSR feature callback operation to svm.c and add MSR 0xc0011029 to the list of MSR-based features. If LFENCE is serializing, then the feature is supported, allowing the hypervisor to set the value of the MSR that guest will see. Support is also added to write (hypervisor only) and read the MSR value for the guest. A write by the guest will result in a #GP. A read by the guest will return the value as set by the host. In this way, the support to expose the feature to the guest is controlled by the hypervisor. Signed-off-by: Tom Lendacky Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Thomas Gleixner Reviewed-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 36 +++++++++++++++++++++++++++++++++++- arch/x86/kvm/x86.c | 1 + 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e3ee1c61fc92..48236bf8a5b2 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -175,6 +175,8 @@ struct vcpu_svm { uint64_t sysenter_eip; uint64_t tsc_aux; + u64 msr_decfg; + u64 next_rip; u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; @@ -3557,7 +3559,18 @@ static int cr8_write_interception(struct vcpu_svm *svm) static int svm_get_msr_feature(struct kvm_msr_entry *msr) { - return 1; + msr->data = 0; + + switch (msr->index) { + case MSR_F10H_DECFG: + if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) + msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; + break; + default: + return 1; + } + + return 0; } static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -3662,6 +3675,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x1E; } break; + case MSR_F10H_DECFG: + msr_info->data = svm->msr_decfg; + break; default: return kvm_get_msr_common(vcpu, msr_info); } @@ -3850,6 +3866,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; + case MSR_F10H_DECFG: { + struct kvm_msr_entry msr_entry; + + msr_entry.index = msr->index; + if (svm_get_msr_feature(&msr_entry)) + return 1; + + /* Check the supported bits */ + if (data & ~msr_entry.data) + return 1; + + /* Don't allow the guest to change a bit, #GP */ + if (!msr->host_initiated && (data ^ msr_entry.data)) + return 1; + + svm->msr_decfg = data; + break; + } case MSR_IA32_APICBASE: if (kvm_vcpu_apicv_active(vcpu)) avic_update_vapic_bar(to_svm(vcpu), data); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 792fa37d5dcd..9c1a9568c78d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1047,6 +1047,7 @@ static unsigned num_emulated_msrs; * can be used by a hypervisor to validate requested CPU features. */ static u32 msr_based_features[] = { + MSR_F10H_DECFG, }; static unsigned int num_msr_based_features; -- GitLab From 6181db2838d6d54708e79496b63df9c447691439 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 28 Feb 2018 14:03:30 +0800 Subject: [PATCH 0742/1291] KVM: X86: Introduce kvm_get_msr_feature() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 66421c1ec340096b291af763ed5721314cdd9c5c upstream Introduce kvm_get_msr_feature() to handle the msrs which are supported by different vendors and sharing the same emulation logic. Signed-off-by: Wanpeng Li Signed-off-by: Radim Krčmář Signed-off-by: Thomas Gleixner Reviewed-by: Paolo Bonzini Cc: Liran Alon Cc: Nadav Amit Cc: Borislav Petkov Cc: Tom Lendacky Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9c1a9568c78d..0c7e44891dc8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1052,13 +1052,25 @@ static u32 msr_based_features[] = { static unsigned int num_msr_based_features; +static int kvm_get_msr_feature(struct kvm_msr_entry *msr) +{ + switch (msr->index) { + default: + if (kvm_x86_ops->get_msr_feature(msr)) + return 1; + } + return 0; +} + static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { struct kvm_msr_entry msr; + int r; msr.index = index; - if (kvm_x86_ops->get_msr_feature(&msr)) - return 1; + r = kvm_get_msr_feature(&msr); + if (r) + return r; *data = msr.data; @@ -4392,7 +4404,7 @@ static void kvm_init_msr_list(void) struct kvm_msr_entry msr; msr.index = msr_based_features[i]; - if (kvm_x86_ops->get_msr_feature(&msr)) + if (kvm_get_msr_feature(&msr)) continue; if (j < i) -- GitLab From 8c9900543e2698ce2f374af712b8fbd2007fddbf Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 28 Feb 2018 14:03:31 +0800 Subject: [PATCH 0743/1291] KVM: X86: Allow userspace to define the microcode version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 518e7b94817abed94becfe6a44f1ece0d4745afe upstream Linux (among the others) has checks to make sure that certain features aren't enabled on a certain family/model/stepping if the microcode version isn't greater than or equal to a known good version. By exposing the real microcode version, we're preventing buggy guests that don't check that they are running virtualized (i.e., they should trust the hypervisor) from disabling features that are effectively not buggy. Suggested-by: Filippo Sironi Signed-off-by: Wanpeng Li Signed-off-by: Radim Krčmář Signed-off-by: Thomas Gleixner Reviewed-by: Paolo Bonzini Cc: Liran Alon Cc: Nadav Amit Cc: Borislav Petkov Cc: Tom Lendacky Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 4 +--- arch/x86/kvm/vmx.c | 1 + arch/x86/kvm/x86.c | 11 +++++++++-- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 017dbbde399e..d6ff1b132b47 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -507,6 +507,7 @@ struct kvm_vcpu_arch { u64 smbase; bool tpr_access_reporting; u64 ia32_xss; + u64 microcode_version; /* * Paging state of the vcpu diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 48236bf8a5b2..282bbcbf3b6a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1618,6 +1618,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; + vcpu->arch.microcode_version = 0x01000065; svm->spec_ctrl = 0; svm->virt_spec_ctrl = 0; @@ -3655,9 +3656,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->virt_spec_ctrl; break; - case MSR_IA32_UCODE_REV: - msr_info->data = 0x01000065; - break; case MSR_F15H_IC_CFG: { int family, model; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fe583f044c0e..bbaa5421c237 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5934,6 +5934,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->rmode.vm86_active = 0; vmx->spec_ctrl = 0; + vcpu->arch.microcode_version = 0x100000000ULL; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(vcpu, 0); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c7e44891dc8..b68be7539b58 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1048,6 +1048,7 @@ static unsigned num_emulated_msrs; */ static u32 msr_based_features[] = { MSR_F10H_DECFG, + MSR_IA32_UCODE_REV, }; static unsigned int num_msr_based_features; @@ -1055,6 +1056,9 @@ static unsigned int num_msr_based_features; static int kvm_get_msr_feature(struct kvm_msr_entry *msr) { switch (msr->index) { + case MSR_IA32_UCODE_REV: + rdmsrl(msr->index, msr->data); + break; default: if (kvm_x86_ops->get_msr_feature(msr)) return 1; @@ -2192,7 +2196,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_AMD64_NB_CFG: - case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_WRITE: case MSR_VM_HSAVE_PA: case MSR_AMD64_PATCH_LOADER: @@ -2200,6 +2203,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_AMD64_DC_CFG: break; + case MSR_IA32_UCODE_REV: + if (msr_info->host_initiated) + vcpu->arch.microcode_version = data; + break; case MSR_EFER: return set_efer(vcpu, data); case MSR_K7_HWCR: @@ -2486,7 +2493,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0; break; case MSR_IA32_UCODE_REV: - msr_info->data = 0x100000000ULL; + msr_info->data = vcpu->arch.microcode_version; break; case MSR_MTRRcap: case 0x200 ... 0x2ff: -- GitLab From ab12e37d683a9ede978df8f0e0bf414e925597ed Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 25 Jun 2018 14:04:37 +0200 Subject: [PATCH 0744/1291] KVM: VMX: support MSR_IA32_ARCH_CAPABILITIES as a feature MSR commit cd28325249a1ca0d771557ce823e0308ad629f98 upstream This lets userspace read the MSR_IA32_ARCH_CAPABILITIES and check that all requested features are available on the host. Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b68be7539b58..fcc058edfaba 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1049,6 +1049,7 @@ static unsigned num_emulated_msrs; static u32 msr_based_features[] = { MSR_F10H_DECFG, MSR_IA32_UCODE_REV, + MSR_IA32_ARCH_CAPABILITIES, }; static unsigned int num_msr_based_features; @@ -1057,7 +1058,8 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr) { switch (msr->index) { case MSR_IA32_UCODE_REV: - rdmsrl(msr->index, msr->data); + case MSR_IA32_ARCH_CAPABILITIES: + rdmsrl_safe(msr->index, &msr->data); break; default: if (kvm_x86_ops->get_msr_feature(msr)) -- GitLab From 6a0bea042dcacc087587e6b44fb3e74ce0c26e5c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 5 Aug 2018 16:07:45 +0200 Subject: [PATCH 0745/1291] x86/speculation: Simplify sysfs report of VMX L1TF vulnerability commit ea156d192f5257a5bf393d33910d3b481bf8a401 upstream Three changes to the content of the sysfs file: - If EPT is disabled, L1TF cannot be exploited even across threads on the same core, and SMT is irrelevant. - If mitigation is completely disabled, and SMT is enabled, print "vulnerable" instead of "vulnerable, SMT vulnerable" - Reorder the two parts so that the main vulnerability state comes first and the detail on SMT is second. Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2409446189e9..6e6cd933d56c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -738,9 +738,15 @@ static ssize_t l1tf_show_state(char *buf) if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); - return sprintf(buf, "%s; VMX: SMT %s, L1D %s\n", L1TF_DEFAULT_MSG, - cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled", - l1tf_vmx_states[l1tf_vmx_mitigation]); + if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED || + (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER && + cpu_smt_control == CPU_SMT_ENABLED)) + return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, + l1tf_vmx_states[l1tf_vmx_mitigation]); + + return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, + l1tf_vmx_states[l1tf_vmx_mitigation], + cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled"); } #else static ssize_t l1tf_show_state(char *buf) -- GitLab From c15396d3f74f6cd7c084bdcc1004714a40cb268a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 5 Aug 2018 16:07:46 +0200 Subject: [PATCH 0746/1291] x86/speculation: Use ARCH_CAPABILITIES to skip L1D flush on vmentry commit 8e0b2b916662e09dd4d09e5271cdf214c6b80e62 upstream Bit 3 of ARCH_CAPABILITIES tells a hypervisor that L1D flush on vmentry is not needed. Add a new value to enum vmx_l1d_flush_state, which is used either if there is no L1TF bug at all, or if bit 3 is set in ARCH_CAPABILITIES. Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/vmx.h | 1 + arch/x86/kernel/cpu/bugs.c | 1 + arch/x86/kvm/vmx.c | 10 ++++++++++ 4 files changed, 13 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 2cccec4ab1c9..ef7eec669a1b 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -70,6 +70,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */ #define ARCH_CAP_SSB_NO (1 << 4) /* * Not susceptible to Speculative Store Bypass * attack, so no Speculative Store Bypass diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 7b0492817169..08c14aec26ac 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -577,6 +577,7 @@ enum vmx_l1d_flush_state { VMENTER_L1D_FLUSH_COND, VMENTER_L1D_FLUSH_ALWAYS, VMENTER_L1D_FLUSH_EPT_DISABLED, + VMENTER_L1D_FLUSH_NOT_REQUIRED, }; extern enum vmx_l1d_flush_state l1tf_vmx_mitigation; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6e6cd933d56c..64a72b4a780a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -731,6 +731,7 @@ static const char *l1tf_vmx_states[] = { [VMENTER_L1D_FLUSH_COND] = "conditional cache flushes", [VMENTER_L1D_FLUSH_ALWAYS] = "cache flushes", [VMENTER_L1D_FLUSH_EPT_DISABLED] = "EPT disabled", + [VMENTER_L1D_FLUSH_NOT_REQUIRED] = "flush not necessary" }; static ssize_t l1tf_show_state(char *buf) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bbaa5421c237..63ec03108b5d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -221,6 +221,16 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) return 0; } + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { + u64 msr; + + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); + if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) { + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; + return 0; + } + } + /* If set to auto use the default l1tf mitigation method */ if (l1tf == VMENTER_L1D_FLUSH_AUTO) { switch (l1tf_mitigation) { -- GitLab From 1110cb2a343f506287d46e0c614512afaa7c7906 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 5 Aug 2018 16:07:47 +0200 Subject: [PATCH 0747/1291] KVM: VMX: Tell the nested hypervisor to skip L1D flush on vmentry commit 5b76a3cff011df2dcb6186c965a2e4d809a05ad4 upstream When nested virtualization is in use, VMENTER operations from the nested hypervisor into the nested guest will always be processed by the bare metal hypervisor, and KVM's "conditional cache flushes" mode in particular does a flush on nested vmentry. Therefore, include the "skip L1D flush on vmentry" bit in KVM's suggested ARCH_CAPABILITIES setting. Add the relevant Documentation. Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/l1tf.rst | 21 +++++++++++++++++++++ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 3 +-- arch/x86/kvm/x86.c | 26 +++++++++++++++++++++++++- 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/l1tf.rst index 5dadb4503ec9..bae52b845de0 100644 --- a/Documentation/admin-guide/l1tf.rst +++ b/Documentation/admin-guide/l1tf.rst @@ -546,6 +546,27 @@ available: EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. +3.4. Nested virtual machines +"""""""""""""""""""""""""""" + +When nested virtualization is in use, three operating systems are involved: +the bare metal hypervisor, the nested hypervisor and the nested virtual +machine. VMENTER operations from the nested hypervisor into the nested +guest will always be processed by the bare metal hypervisor. If KVM is the +bare metal hypervisor it wiil: + + - Flush the L1D cache on every switch from the nested hypervisor to the + nested virtual machine, so that the nested hypervisor's secrets are not + exposed to the nested virtual machine; + + - Flush the L1D cache on every switch from the nested virtual machine to + the nested hypervisor; this is a complex operation, and flushing the L1D + cache avoids that the bare metal hypervisor's secrets are exposed to the + nested virtual machine; + + - Instruct the nested hypervisor to not perform any L1D cache flush. This + is an optimization to avoid double L1D flushing. + .. _default_mitigations: diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d6ff1b132b47..4015b88383ce 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1374,6 +1374,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); +u64 kvm_get_arch_capabilities(void); void kvm_define_shared_msr(unsigned index, u32 msr); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 63ec03108b5d..f015ca3997d9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5910,8 +5910,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); + vmx->arch_capabilities = kvm_get_arch_capabilities(); vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fcc058edfaba..5c2c09f6c1c3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1054,11 +1054,35 @@ static u32 msr_based_features[] = { static unsigned int num_msr_based_features; +u64 kvm_get_arch_capabilities(void) +{ + u64 data; + + rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data); + + /* + * If we're doing cache flushes (either "always" or "cond") + * we will do one whenever the guest does a vmlaunch/vmresume. + * If an outer hypervisor is doing the cache flush for us + * (VMENTER_L1D_FLUSH_NESTED_VM), we can safely pass that + * capability to the guest too, and if EPT is disabled we're not + * vulnerable. Overall, only VMENTER_L1D_FLUSH_NEVER will + * require a nested hypervisor to do a flush of its own. + */ + if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER) + data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH; + + return data; +} +EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities); + static int kvm_get_msr_feature(struct kvm_msr_entry *msr) { switch (msr->index) { - case MSR_IA32_UCODE_REV: case MSR_IA32_ARCH_CAPABILITIES: + msr->data = kvm_get_arch_capabilities(); + break; + case MSR_IA32_UCODE_REV: rdmsrl_safe(msr->index, &msr->data); break; default: -- GitLab From 9eb0a3cce0089d93ad031c85ce6f2aee09fd4015 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Aug 2018 08:19:57 +0200 Subject: [PATCH 0748/1291] cpu/hotplug: Fix SMT supported evaluation commit bc2d8d262cba5736332cbc866acb11b1c5748aa9 upstream Josh reported that the late SMT evaluation in cpu_smt_state_init() sets cpu_smt_control to CPU_SMT_NOT_SUPPORTED in case that 'nosmt' was supplied on the kernel command line as it cannot differentiate between SMT disabled by BIOS and SMT soft disable via 'nosmt'. That wreckages the state and makes the sysfs interface unusable. Rework this so that during bringup of the non boot CPUs the availability of SMT is determined in cpu_smt_allowed(). If a newly booted CPU is not a 'primary' thread then set the local cpu_smt_available marker and evaluate this explicitely right after the initial SMP bringup has finished. SMT evaulation on x86 is a trainwreck as the firmware has all the information _before_ booting the kernel, but there is no interface to query it. Fixes: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS") Reported-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- include/linux/cpu.h | 2 ++ kernel/cpu.c | 41 ++++++++++++++++++++++++++------------ kernel/smp.c | 2 ++ 4 files changed, 33 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 64a72b4a780a..edfc64a8a154 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -62,7 +62,7 @@ void __init check_bugs(void) * identify_boot_cpu() initialized SMT support information, let the * core code know. */ - cpu_smt_check_topology(); + cpu_smt_check_topology_early(); if (!IS_ENABLED(CONFIG_SMP)) { pr_info("CPU: "); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index c7712e042aba..2a378d261914 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -188,10 +188,12 @@ enum cpuhp_smt_control { #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) extern enum cpuhp_smt_control cpu_smt_control; extern void cpu_smt_disable(bool force); +extern void cpu_smt_check_topology_early(void); extern void cpu_smt_check_topology(void); #else # define cpu_smt_control (CPU_SMT_ENABLED) static inline void cpu_smt_disable(bool force) { } +static inline void cpu_smt_check_topology_early(void) { } static inline void cpu_smt_check_topology(void) { } #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 5787d5e043da..1817d4f0c168 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -351,6 +351,8 @@ EXPORT_SYMBOL_GPL(cpu_hotplug_enable); enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; EXPORT_SYMBOL_GPL(cpu_smt_control); +static bool cpu_smt_available __read_mostly; + void __init cpu_smt_disable(bool force) { if (cpu_smt_control == CPU_SMT_FORCE_DISABLED || @@ -367,14 +369,28 @@ void __init cpu_smt_disable(bool force) /* * The decision whether SMT is supported can only be done after the full - * CPU identification. Called from architecture code. + * CPU identification. Called from architecture code before non boot CPUs + * are brought up. */ -void __init cpu_smt_check_topology(void) +void __init cpu_smt_check_topology_early(void) { if (!topology_smt_supported()) cpu_smt_control = CPU_SMT_NOT_SUPPORTED; } +/* + * If SMT was disabled by BIOS, detect it here, after the CPUs have been + * brought online. This ensures the smt/l1tf sysfs entries are consistent + * with reality. cpu_smt_available is set to true during the bringup of non + * boot CPUs when a SMT sibling is detected. Note, this may overwrite + * cpu_smt_control's previous setting. + */ +void __init cpu_smt_check_topology(void) +{ + if (!cpu_smt_available) + cpu_smt_control = CPU_SMT_NOT_SUPPORTED; +} + static int __init smt_cmdline_disable(char *str) { cpu_smt_disable(str && !strcmp(str, "force")); @@ -384,10 +400,18 @@ early_param("nosmt", smt_cmdline_disable); static inline bool cpu_smt_allowed(unsigned int cpu) { - if (cpu_smt_control == CPU_SMT_ENABLED) + if (topology_is_primary_thread(cpu)) return true; - if (topology_is_primary_thread(cpu)) + /* + * If the CPU is not a 'primary' thread and the booted_once bit is + * set then the processor has SMT support. Store this information + * for the late check of SMT support in cpu_smt_check_topology(). + */ + if (per_cpu(cpuhp_state, cpu).booted_once) + cpu_smt_available = true; + + if (cpu_smt_control == CPU_SMT_ENABLED) return true; /* @@ -2137,15 +2161,6 @@ static const struct attribute_group cpuhp_smt_attr_group = { static int __init cpu_smt_state_init(void) { - /* - * If SMT was disabled by BIOS, detect it here, after the CPUs have - * been brought online. This ensures the smt/l1tf sysfs entries are - * consistent with reality. Note this may overwrite cpu_smt_control's - * previous setting. - */ - if (topology_max_smt_threads() == 1) - cpu_smt_control = CPU_SMT_NOT_SUPPORTED; - return sysfs_create_group(&cpu_subsys.dev_root->kobj, &cpuhp_smt_attr_group); } diff --git a/kernel/smp.c b/kernel/smp.c index c94dd85c8d41..2d1da290f144 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -584,6 +584,8 @@ void __init smp_init(void) num_nodes, (num_nodes > 1 ? "s" : ""), num_cpus, (num_cpus > 1 ? "s" : "")); + /* Final decision about SMT support */ + cpu_smt_check_topology(); /* Any cleanup work */ smp_cpus_done(setup_max_cpus); } -- GitLab From a5f284feeb2071f4a381fccf1c8c9b3e05b7a465 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 7 Aug 2018 15:09:36 -0700 Subject: [PATCH 0749/1291] x86/speculation/l1tf: Invert all not present mappings commit f22cc87f6c1f771b57c407555cfefd811cdd9507 upstream For kernel mappings PAGE_PROTNONE is not necessarily set for a non present mapping, but the inversion logic explicitely checks for !PRESENT and PROT_NONE. Remove the PROT_NONE check and make the inversion unconditional for all not present mappings. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable-invert.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h index 177564187fc0..44b1203ece12 100644 --- a/arch/x86/include/asm/pgtable-invert.h +++ b/arch/x86/include/asm/pgtable-invert.h @@ -6,7 +6,7 @@ static inline bool __pte_needs_invert(u64 val) { - return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE; + return !(val & _PAGE_PRESENT); } /* Get a mask to xor with the page table entry to get the correct pfn. */ -- GitLab From d4c1ad0615ff7fd078870791446745442f969798 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 7 Aug 2018 15:09:37 -0700 Subject: [PATCH 0750/1291] x86/speculation/l1tf: Make pmd/pud_mknotpresent() invert commit 0768f91530ff46683e0b372df14fd79fe8d156e5 upstream Some cases in THP like: - MADV_FREE - mprotect - split mark the PMD non present for temporarily to prevent races. The window for an L1TF attack in these contexts is very small, but it wants to be fixed for correctness sake. Use the proper low level functions for pmd/pud_mknotpresent() to address this. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2064571dc9be..6a4b1a54ff47 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -410,11 +410,6 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) return pmd_set_flags(pmd, _PAGE_RW); } -static inline pmd_t pmd_mknotpresent(pmd_t pmd) -{ - return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE); -} - static inline pud_t pud_set_flags(pud_t pud, pudval_t set) { pudval_t v = native_pud_val(pud); @@ -469,11 +464,6 @@ static inline pud_t pud_mkwrite(pud_t pud) return pud_set_flags(pud, _PAGE_RW); } -static inline pud_t pud_mknotpresent(pud_t pud) -{ - return pud_clear_flags(pud, _PAGE_PRESENT | _PAGE_PROTNONE); -} - #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline int pte_soft_dirty(pte_t pte) { @@ -560,6 +550,18 @@ static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) return __pud(pfn | massage_pgprot(pgprot)); } +static inline pmd_t pmd_mknotpresent(pmd_t pmd) +{ + return pfn_pmd(pmd_pfn(pmd), + __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); +} + +static inline pud_t pud_mknotpresent(pud_t pud) +{ + return pfn_pud(pud_pfn(pud), + __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); +} + static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) -- GitLab From ce1eed46b7b7b8e2c8dc515dbb07968241a0208e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 7 Aug 2018 15:09:39 -0700 Subject: [PATCH 0751/1291] x86/mm/pat: Make set_memory_np() L1TF safe commit 958f79b9ee55dfaf00c8106ed1c22a2919e0028b upstream set_memory_np() is used to mark kernel mappings not present, but it has it's own open coded mechanism which does not have the L1TF protection of inverting the address bits. Replace the open coded PTE manipulation with the L1TF protecting low level PTE routines. Passes the CPA self test. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pageattr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 4085897fef64..464f53da3a6f 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1006,8 +1006,8 @@ static long populate_pmd(struct cpa_data *cpa, pmd = pmd_offset(pud, start); - set_pmd(pmd, __pmd(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | - massage_pgprot(pmd_pgprot))); + set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn, + canon_pgprot(pmd_pgprot)))); start += PMD_SIZE; cpa->pfn += PMD_SIZE >> PAGE_SHIFT; @@ -1079,8 +1079,8 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d, * Map everything starting from the Gb boundary, possibly with 1G pages */ while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) { - set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | - massage_pgprot(pud_pgprot))); + set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn, + canon_pgprot(pud_pgprot)))); start += PUD_SIZE; cpa->pfn += PUD_SIZE >> PAGE_SHIFT; -- GitLab From 06f412c63545b1dec8451f6eda007a2d40c08e58 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 7 Aug 2018 15:09:38 -0700 Subject: [PATCH 0752/1291] x86/mm/kmmio: Make the tracer robust against L1TF commit 1063711b57393c1999248cccb57bebfaf16739e7 upstream The mmio tracer sets io mapping PTEs and PMDs to non present when enabled without inverting the address bits, which makes the PTE entry vulnerable for L1TF. Make it use the right low level macros to actually invert the address bits to protect against L1TF. In principle this could be avoided because MMIO tracing is not likely to be enabled on production machines, but the fix is straigt forward and for consistency sake it's better to get rid of the open coded PTE manipulation. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kmmio.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 7c8686709636..79eb55ce69a9 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -126,24 +126,29 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr) static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old) { + pmd_t new_pmd; pmdval_t v = pmd_val(*pmd); if (clear) { - *old = v & _PAGE_PRESENT; - v &= ~_PAGE_PRESENT; - } else /* presume this has been called with clear==true previously */ - v |= *old; - set_pmd(pmd, __pmd(v)); + *old = v; + new_pmd = pmd_mknotpresent(*pmd); + } else { + /* Presume this has been called with clear==true previously */ + new_pmd = __pmd(*old); + } + set_pmd(pmd, new_pmd); } static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) { pteval_t v = pte_val(*pte); if (clear) { - *old = v & _PAGE_PRESENT; - v &= ~_PAGE_PRESENT; - } else /* presume this has been called with clear==true previously */ - v |= *old; - set_pte_atomic(pte, __pte(v)); + *old = v; + /* Nothing should care about address */ + pte_clear(&init_mm, 0, pte); + } else { + /* Presume this has been called with clear==true previously */ + set_pte_atomic(pte, __pte(*old)); + } } static int clear_page_presence(struct kmmio_fault_page *f, bool clear) -- GitLab From 36f50a5cd27601ae304830f1dec250b4988002ce Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 8 Aug 2018 11:00:16 +0100 Subject: [PATCH 0753/1291] tools headers: Synchronise x86 cpufeatures.h for L1TF additions commit e24f14b0ff985f3e09e573ba1134bfdf42987e05 upstream Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- tools/arch/x86/include/asm/cpufeatures.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 403e97d5e243..8418462298e7 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -219,6 +219,7 @@ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -338,6 +339,7 @@ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ @@ -370,5 +372,6 @@ #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ +#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ #endif /* _ASM_X86_CPUFEATURES_H */ -- GitLab From e456004eb77734e274e520c83ad9be76736e622c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 10 Aug 2018 08:31:10 +0100 Subject: [PATCH 0754/1291] x86/microcode: Allow late microcode loading with SMT disabled commit 07d981ad4cf1e78361c6db1c28ee5ba105f96cc1 upstream The kernel unnecessarily prevents late microcode loading when SMT is disabled. It should be safe to allow it if all the primary threads are online. Signed-off-by: Josh Poimboeuf Acked-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/core.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 4fc0e08a30b9..387a8f44fba1 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -509,12 +509,20 @@ static struct platform_device *microcode_pdev; static int check_online_cpus(void) { - if (num_online_cpus() == num_present_cpus()) - return 0; + unsigned int cpu; - pr_err("Not all CPUs online, aborting microcode update.\n"); + /* + * Make sure all CPUs are online. It's fine for SMT to be disabled if + * all the primary threads are still online. + */ + for_each_present_cpu(cpu) { + if (topology_is_primary_thread(cpu) && !cpu_online(cpu)) { + pr_err("Not all CPUs online, aborting microcode update.\n"); + return -EINVAL; + } + } - return -EINVAL; + return 0; } static atomic_t late_cpus_in; -- GitLab From 9428a6a0ea7bd965fd61b81e83811214654b78e2 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 14 Aug 2018 23:38:57 +0200 Subject: [PATCH 0755/1291] x86/smp: fix non-SMP broken build due to redefinition of apic_id_is_primary_thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d0055f351e647f33f3b0329bff022213bf8aa085 upstream. The function has an inline "return false;" definition with CONFIG_SMP=n but the "real" definition is also visible leading to "redefinition of ‘apic_id_is_primary_thread’" compiler error. Guard it with #ifdef CONFIG_SMP Signed-off-by: Vlastimil Babka Fixes: 6a4d2657e048 ("x86/smp: Provide topology_is_primary_thread()") Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/apic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 348fb896b5b4..2e64178f284d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2094,6 +2094,7 @@ static int cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = -1, }; +#ifdef CONFIG_SMP /** * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread * @id: APIC ID to check @@ -2108,6 +2109,7 @@ bool apic_id_is_primary_thread(unsigned int apicid) mask = (1U << (fls(smp_num_siblings) - 1)) - 1; return !(apicid & mask); } +#endif /* * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids -- GitLab From 7bdbaba8e99782ace50e585549a54af3d079f80d Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Wed, 15 Aug 2018 00:26:00 +0300 Subject: [PATCH 0756/1291] cpu/hotplug: Non-SMP machines do not make use of booted_once commit 269777aa530f3438ec1781586cdac0b5fe47b061 upstream. Commit 0cc3cd21657b ("cpu/hotplug: Boot HT siblings at least once") breaks non-SMP builds. [ I suspect the 'bool' fields should just be made to be bitfields and be exposed regardless of configuration, but that's a separate cleanup that I'll leave to the owners of this file for later. - Linus ] Fixes: 0cc3cd21657b ("cpu/hotplug: Boot HT siblings at least once") Cc: Dave Hansen Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Abel Vesa Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/cpu.c b/kernel/cpu.c index 1817d4f0c168..8f02f9b6e046 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2279,6 +2279,8 @@ void __init boot_cpu_init(void) */ void __init boot_cpu_hotplug_init(void) { +#ifdef CONFIG_SMP this_cpu_write(cpuhp_state.booted_once, true); +#endif this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); } -- GitLab From 7cefa38bfc5829dfdcc4ebf92f5db9e7e783f97b Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 14 Aug 2018 20:50:47 +0200 Subject: [PATCH 0757/1291] x86/init: fix build with CONFIG_SWAP=n commit 792adb90fa724ce07c0171cbc96b9215af4b1045 upstream. The introduction of generic_max_swapfile_size and arch-specific versions has broken linking on x86 with CONFIG_SWAP=n due to undefined reference to 'generic_max_swapfile_size'. Fix it by compiling the x86-specific max_swapfile_size() only with CONFIG_SWAP=y. Reported-by: Tomas Pruzina Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2") Signed-off-by: Vlastimil Babka Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 1d55a3dfd741..37f60dfd7e4e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -883,6 +883,7 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) __pte2cachemode_tbl[entry] = cache; } +#ifdef CONFIG_SWAP unsigned long max_swapfile_size(void) { unsigned long pages; @@ -903,3 +904,4 @@ unsigned long max_swapfile_size(void) } return pages; } +#endif -- GitLab From b37de2cf27a7bc9ad51a72670d902ce71fc9da1e Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Sat, 14 Jul 2018 21:56:13 +0200 Subject: [PATCH 0758/1291] x86/speculation/l1tf: Unbreak !__HAVE_ARCH_PFN_MODIFY_ALLOWED architectures commit 6c26fcd2abfe0a56bbd95271fce02df2896cfd24 upstream. pfn_modify_allowed() and arch_has_pfn_modify_check() are outside of the !__ASSEMBLY__ section in include/asm-generic/pgtable.h, which confuses assembler on archs that don't have __HAVE_ARCH_PFN_MODIFY_ALLOWED (e.g. ia64) and breaks build: include/asm-generic/pgtable.h: Assembler messages: include/asm-generic/pgtable.h:538: Error: Unknown opcode `static inline bool pfn_modify_allowed(unsigned long pfn,pgprot_t prot)' include/asm-generic/pgtable.h:540: Error: Unknown opcode `return true' include/asm-generic/pgtable.h:543: Error: Unknown opcode `static inline bool arch_has_pfn_modify_check(void)' include/asm-generic/pgtable.h:545: Error: Unknown opcode `return false' arch/ia64/kernel/entry.S:69: Error: `mov' does not fit into bundle Move those two static inlines into the !__ASSEMBLY__ section so that they don't confuse the asm build pass. Fixes: 42e4089c7890 ("x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE mappings") Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/pgtable.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index f50d253f213f..46a2f5d9aa25 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1055,6 +1055,18 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, static inline void init_espfix_bsp(void) { } #endif +#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED +static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) +{ + return true; +} + +static inline bool arch_has_pfn_modify_check(void) +{ + return false; +} +#endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */ + #endif /* !__ASSEMBLY__ */ #ifndef io_remap_pfn_range @@ -1069,16 +1081,4 @@ static inline void init_espfix_bsp(void) { } #endif #endif -#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED -static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) -{ - return true; -} - -static inline bool arch_has_pfn_modify_check(void) -{ - return false; -} -#endif - #endif /* _ASM_GENERIC_PGTABLE_H */ -- GitLab From dc589e579782c75237018d60ab0ea05d420dffad Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 27 Apr 2018 16:34:34 -0500 Subject: [PATCH 0759/1291] x86/CPU/AMD: Have smp_num_siblings and cpu_llc_id always be present commit f8b64d08dde2714c62751d18ba77f4aeceb161d3 upstream. Move smp_num_siblings and cpu_llc_id to cpu/common.c so that they're always present as symbols and not only in the CONFIG_SMP case. Then, other code using them doesn't need ugly ifdeffery anymore. Get rid of some ifdeffery. Signed-off-by: Borislav Petkov Signed-off-by: Suravee Suthikulpanit Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/r/1524864877-111962-2-git-send-email-suravee.suthikulpanit@amd.com Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/smp.h | 1 - arch/x86/kernel/cpu/amd.c | 10 +--------- arch/x86/kernel/cpu/common.c | 7 +++++++ arch/x86/kernel/smpboot.c | 7 ------- 4 files changed, 8 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 461f53d27708..fe2ee61880a8 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -170,7 +170,6 @@ static inline int wbinvd_on_all_cpus(void) wbinvd(); return 0; } -#define smp_num_siblings 1 #endif /* CONFIG_SMP */ extern unsigned disabled_cpus; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bb013735510a..dda741bd5789 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -298,7 +298,6 @@ static int nearby_node(int apicid) } #endif -#ifdef CONFIG_SMP /* * Fix up cpu_core_id for pre-F17h systems to be in the * [0 .. cores_per_node - 1] range. Not really needed but @@ -382,7 +381,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c) legacy_fixup_core_id(c); } } -#endif /* * On a AMD dual core setup the lower bits of the APIC id distinguish the cores. @@ -390,7 +388,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c) */ static void amd_detect_cmp(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP unsigned bits; int cpu = smp_processor_id(); @@ -402,16 +399,11 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) /* use socket ID also for last level cache */ per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; amd_get_topology(c); -#endif } u16 amd_get_nb_id(int cpu) { - u16 id = 0; -#ifdef CONFIG_SMP - id = per_cpu(cpu_llc_id, cpu); -#endif - return id; + return per_cpu(cpu_llc_id, cpu); } EXPORT_SYMBOL_GPL(amd_get_nb_id); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c925498224d0..dd02ee4fa8cd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -66,6 +66,13 @@ cpumask_var_t cpu_callin_mask; /* representing cpus for which sibling maps can be computed */ cpumask_var_t cpu_sibling_setup_mask; +/* Number of siblings per CPU package */ +int smp_num_siblings = 1; +EXPORT_SYMBOL(smp_num_siblings); + +/* Last level cache ID of each logical CPU */ +DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; + /* correctly size the local cpu masks */ void __init setup_cpu_local_masks(void) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6fd12e0a07c9..5ebb0dbcf4f7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -80,13 +80,6 @@ #include #include -/* Number of siblings per CPU package */ -int smp_num_siblings = 1; -EXPORT_SYMBOL(smp_num_siblings); - -/* Last level cache ID of each logical CPU */ -DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; - /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); -- GitLab From 9d5cd9f2a4d7eb445963cafd9872186a9a262077 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 15 Aug 2018 18:13:02 +0200 Subject: [PATCH 0760/1291] Linux 4.14.63 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d407ecfdee0b..f3bb9428b3dc 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 62 +SUBLEVEL = 63 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 5f7a645432d6a8ad4164a97337af323f0b345fe3 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 15 Aug 2018 08:38:33 -0700 Subject: [PATCH 0761/1291] x86/l1tf: Fix build error seen if CONFIG_KVM_INTEL is disabled commit 1eb46908b35dfbac0ec1848d4b1e39667e0187e9 upstream. allmodconfig+CONFIG_INTEL_KVM=n results in the following build error. ERROR: "l1tf_vmx_mitigation" [arch/x86/kvm/kvm.ko] undefined! Fixes: 5b76a3cff011 ("KVM: VMX: Tell the nested hypervisor to skip L1D flush on vmentry") Reported-by: Meelis Roos Cc: Meelis Roos Cc: Paolo Bonzini Cc: Thomas Gleixner Signed-off-by: Guenter Roeck Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index edfc64a8a154..d07addb99b71 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -648,10 +648,9 @@ void x86_spec_ctrl_setup_ap(void) enum l1tf_mitigations l1tf_mitigation __ro_after_init = L1TF_MITIGATION_FLUSH; #if IS_ENABLED(CONFIG_KVM_INTEL) EXPORT_SYMBOL_GPL(l1tf_mitigation); - +#endif enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation); -#endif static void __init l1tf_select_mitigation(void) { -- GitLab From 7985292f97b4fb572d03353ca151fca683796d5d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 15 Aug 2018 13:22:27 -0700 Subject: [PATCH 0762/1291] x86: i8259: Add missing include file commit 0a957467c5fd46142bc9c52758ffc552d4c5e2f7 upstream. i8259.h uses inb/outb and thus needs to include asm/io.h to avoid the following build error, as seen with x86_64:defconfig and CONFIG_SMP=n. In file included from drivers/rtc/rtc-cmos.c:45:0: arch/x86/include/asm/i8259.h: In function 'inb_pic': arch/x86/include/asm/i8259.h:32:24: error: implicit declaration of function 'inb' arch/x86/include/asm/i8259.h: In function 'outb_pic': arch/x86/include/asm/i8259.h:45:2: error: implicit declaration of function 'outb' Reported-by: Sebastian Gottschall Suggested-by: Sebastian Gottschall Fixes: 447ae3166702 ("x86: Don't include linux/irq.h from asm/hardirq.h") Signed-off-by: Guenter Roeck Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/i8259.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 5cdcdbd4d892..89789e8c80f6 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -3,6 +3,7 @@ #define _ASM_X86_I8259_H #include +#include extern unsigned int cached_irq_mask; -- GitLab From 3d0170b8af8dd66f67989e71469cb905b2a20bdd Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 27 Jun 2018 08:13:46 -0600 Subject: [PATCH 0763/1291] x86/mm: Disable ioremap free page handling on x86-PAE commit f967db0b9ed44ec3057a28f3b28efc51df51b835 upstream. ioremap() supports pmd mappings on x86-PAE. However, kernel's pmd tables are not shared among processes on x86-PAE. Therefore, any update to sync'd pmd entries need re-syncing. Freeing a pte page also leads to a vmalloc fault and hits the BUG_ON in vmalloc_sync_one(). Disable free page handling on x86-PAE. pud_free_pmd_page() and pmd_free_pte_page() simply return 0 if a given pud/pmd entry is present. This assures that ioremap() does not update sync'd pmd entries at the cost of falling back to pte mappings. Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") Reported-by: Joerg Roedel Signed-off-by: Toshi Kani Signed-off-by: Thomas Gleixner Cc: mhocko@suse.com Cc: akpm@linux-foundation.org Cc: hpa@zytor.com Cc: cpandya@codeaurora.org Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: Michal Hocko Cc: "H. Peter Anvin" Cc: Link: https://lkml.kernel.org/r/20180627141348.21777-2-toshi.kani@hpe.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pgtable.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index c03c85e4fb6a..debb7b7650b2 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -712,6 +712,7 @@ int pmd_clear_huge(pmd_t *pmd) return 0; } +#ifdef CONFIG_X86_64 /** * pud_free_pmd_page - Clear pud entry and free pmd page. * @pud: Pointer to a PUD. @@ -759,4 +760,22 @@ int pmd_free_pte_page(pmd_t *pmd) return 1; } + +#else /* !CONFIG_X86_64 */ + +int pud_free_pmd_page(pud_t *pud) +{ + return pud_none(*pud); +} + +/* + * Disable free page handling on x86-PAE. This assures that ioremap() + * does not update sync'd pmd entries. See vmalloc_sync_one(). + */ +int pmd_free_pte_page(pmd_t *pmd) +{ + return pmd_none(*pmd); +} + +#endif /* CONFIG_X86_64 */ #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ -- GitLab From 50123ab8c47d6e747ca333b19ac11e2b86e8dc2a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 1 Jul 2018 19:46:06 -0700 Subject: [PATCH 0764/1291] kbuild: verify that $DEPMOD is installed commit 934193a654c1f4d0643ddbf4b2529b508cae926e upstream. Verify that 'depmod' ($DEPMOD) is installed. This is a partial revert of commit 620c231c7a7f ("kbuild: do not check for ancient modutils tools"). Also update Documentation/process/changes.rst to refer to kmod instead of module-init-tools. Fixes kernel bugzilla #198965: https://bugzilla.kernel.org/show_bug.cgi?id=198965 Signed-off-by: Randy Dunlap Cc: Lucas De Marchi Cc: Lucas De Marchi Cc: Michal Marek Cc: Jessica Yu Cc: Chih-Wei Huang Cc: stable@vger.kernel.org # any kernel since 2012 Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- Documentation/process/changes.rst | 19 +++++++------------ scripts/depmod.sh | 8 +++++++- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 560beaef5a7c..73fcdcd52b87 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -33,7 +33,7 @@ GNU C 3.2 gcc --version GNU make 3.81 make --version binutils 2.20 ld -v util-linux 2.10o fdformat --version -module-init-tools 0.9.10 depmod -V +kmod 13 depmod -V e2fsprogs 1.41.4 e2fsck -V jfsutils 1.1.3 fsck.jfs -V reiserfsprogs 3.6.3 reiserfsck -V @@ -141,12 +141,6 @@ is not build with ``CONFIG_KALLSYMS`` and you have no way to rebuild and reproduce the Oops with that option, then you can still decode that Oops with ksymoops. -Module-Init-Tools ------------------ - -A new module loader is now in the kernel that requires ``module-init-tools`` -to use. It is backward compatible with the 2.4.x series kernels. - Mkinitrd -------- @@ -346,16 +340,17 @@ Util-linux - +Kmod +---- + +- +- + Ksymoops -------- - -Module-Init-Tools ------------------ - -- - Mkinitrd -------- diff --git a/scripts/depmod.sh b/scripts/depmod.sh index 9831cca31240..f41b0a4b575c 100755 --- a/scripts/depmod.sh +++ b/scripts/depmod.sh @@ -11,10 +11,16 @@ DEPMOD=$1 KERNELRELEASE=$2 SYMBOL_PREFIX=$3 -if ! test -r System.map -a -x "$DEPMOD"; then +if ! test -r System.map ; then exit 0 fi +if [ -z $(command -v $DEPMOD) ]; then + echo "'make modules_install' requires $DEPMOD. Please install it." >&2 + echo "This is probably in the kmod package." >&2 + exit 1 +fi + # older versions of depmod don't support -P # support was added in module-init-tools 3.13 if test -n "$SYMBOL_PREFIX"; then -- GitLab From e73828389b1c3a8c4ba17f0fc58604a3b8d88af1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Jun 2018 14:14:35 -0700 Subject: [PATCH 0765/1291] crypto: x86/sha256-mb - fix digest copy in sha256_mb_mgr_get_comp_job_avx2() commit af839b4e546613aed1fbd64def73956aa98631e7 upstream. There is a copy-paste error where sha256_mb_mgr_get_comp_job_avx2() copies the SHA-256 digest state from sha256_mb_mgr::args::digest to job_sha256::result_digest. Consequently, the sha256_mb algorithm sometimes calculates the wrong digest. Fix it. Reproducer using AF_ALG: #include #include #include #include #include #include static const __u8 expected[32] = "\xad\x7f\xac\xb2\x58\x6f\xc6\xe9\x66\xc0\x04\xd7\xd1\xd1\x6b\x02" "\x4f\x58\x05\xff\x7c\xb4\x7c\x7a\x85\xda\xbd\x8b\x48\x89\x2c\xa7"; int main() { int fd; struct sockaddr_alg addr = { .salg_type = "hash", .salg_name = "sha256_mb", }; __u8 data[4096] = { 0 }; __u8 digest[32]; int ret; int i; fd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(fd, (void *)&addr, sizeof(addr)); fork(); fd = accept(fd, 0, 0); do { ret = write(fd, data, 4096); assert(ret == 4096); ret = read(fd, digest, 32); assert(ret == 32); } while (memcmp(digest, expected, 32) == 0); printf("wrong digest: "); for (i = 0; i < 32; i++) printf("%02x", digest[i]); printf("\n"); } Output was: wrong digest: ad7facb2000000000000000000000000ffffffef7cb47c7a85dabd8b48892ca7 Fixes: 172b1d6b5a93 ("crypto: sha256-mb - fix ctx pointer and digest copy") Cc: # v4.8+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S index 16c4ccb1f154..d2364c55bbde 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S @@ -265,7 +265,7 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2) vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 - vmovd _args_digest(state , idx, 4) , %xmm0 + vmovd _args_digest+4*32(state, idx, 4), %xmm1 vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1 vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1 vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1 -- GitLab From ef70d14553ed93bb074ecf0c5136076f7481c9a4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Jun 2018 10:22:37 -0700 Subject: [PATCH 0766/1291] crypto: vmac - require a block cipher with 128-bit block size commit 73bf20ef3df262026c3470241ae4ac8196943ffa upstream. The VMAC template assumes the block cipher has a 128-bit block size, but it failed to check for that. Thus it was possible to instantiate it using a 64-bit block size cipher, e.g. "vmac(cast5)", causing uninitialized memory to be used. Add the needed check when instantiating the template. Fixes: f1939f7c5645 ("crypto: vmac - New hash algorithm for intel_txt support") Cc: # v2.6.32+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/vmac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crypto/vmac.c b/crypto/vmac.c index df76a816cfb2..3034454a3713 100644 --- a/crypto/vmac.c +++ b/crypto/vmac.c @@ -655,6 +655,10 @@ static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb) if (IS_ERR(alg)) return PTR_ERR(alg); + err = -EINVAL; + if (alg->cra_blocksize != 16) + goto out_put_alg; + inst = shash_alloc_instance("vmac", alg); err = PTR_ERR(inst); if (IS_ERR(inst)) -- GitLab From e7aefb13e61c52e025c510bc6372dc08edbdf27b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Jun 2018 10:22:38 -0700 Subject: [PATCH 0767/1291] crypto: vmac - separate tfm and request context commit bb29648102335586e9a66289a1d98a0cb392b6e5 upstream. syzbot reported a crash in vmac_final() when multiple threads concurrently use the same "vmac(aes)" transform through AF_ALG. The bug is pretty fundamental: the VMAC template doesn't separate per-request state from per-tfm (per-key) state like the other hash algorithms do, but rather stores it all in the tfm context. That's wrong. Also, vmac_final() incorrectly zeroes most of the state including the derived keys and cached pseudorandom pad. Therefore, only the first VMAC invocation with a given key calculates the correct digest. Fix these bugs by splitting the per-tfm state from the per-request state and using the proper init/update/final sequencing for requests. Reproducer for the crash: #include #include #include int main() { int fd; struct sockaddr_alg addr = { .salg_type = "hash", .salg_name = "vmac(aes)", }; char buf[256] = { 0 }; fd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(fd, (void *)&addr, sizeof(addr)); setsockopt(fd, SOL_ALG, ALG_SET_KEY, buf, 16); fork(); fd = accept(fd, NULL, NULL); for (;;) write(fd, buf, 256); } The immediate cause of the crash is that vmac_ctx_t.partial_size exceeds VMAC_NHBYTES, causing vmac_final() to memset() a negative length. Reported-by: syzbot+264bca3a6e8d645550d3@syzkaller.appspotmail.com Fixes: f1939f7c5645 ("crypto: vmac - New hash algorithm for intel_txt support") Cc: # v2.6.32+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/vmac.c | 408 +++++++++++++++++++----------------------- include/crypto/vmac.h | 63 ------- 2 files changed, 181 insertions(+), 290 deletions(-) delete mode 100644 include/crypto/vmac.h diff --git a/crypto/vmac.c b/crypto/vmac.c index 3034454a3713..bb2fc787d615 100644 --- a/crypto/vmac.c +++ b/crypto/vmac.c @@ -1,6 +1,10 @@ /* - * Modified to interface to the Linux kernel + * VMAC: Message Authentication Code using Universal Hashing + * + * Reference: https://tools.ietf.org/html/draft-krovetz-vmac-01 + * * Copyright (c) 2009, Intel Corporation. + * Copyright (c) 2018, Google Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -16,14 +20,15 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. */ -/* -------------------------------------------------------------------------- - * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai. - * This implementation is herby placed in the public domain. - * The authors offers no warranty. Use at your own risk. - * Please send bug reports to the authors. - * Last modified: 17 APR 08, 1700 PDT - * ----------------------------------------------------------------------- */ +/* + * Derived from: + * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai. + * This implementation is herby placed in the public domain. + * The authors offers no warranty. Use at your own risk. + * Last modified: 17 APR 08, 1700 PDT + */ +#include #include #include #include @@ -31,9 +36,35 @@ #include #include #include -#include #include +/* + * User definable settings. + */ +#define VMAC_TAG_LEN 64 +#define VMAC_KEY_SIZE 128/* Must be 128, 192 or 256 */ +#define VMAC_KEY_LEN (VMAC_KEY_SIZE/8) +#define VMAC_NHBYTES 128/* Must 2^i for any 3 < i < 13 Standard = 128*/ + +/* per-transform (per-key) context */ +struct vmac_tfm_ctx { + struct crypto_cipher *cipher; + u64 nhkey[(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)]; + u64 polykey[2*VMAC_TAG_LEN/64]; + u64 l3key[2*VMAC_TAG_LEN/64]; +}; + +/* per-request context */ +struct vmac_desc_ctx { + union { + u8 partial[VMAC_NHBYTES]; /* partial block */ + __le64 partial_words[VMAC_NHBYTES / 8]; + }; + unsigned int partial_size; /* size of the partial block */ + bool first_block_processed; + u64 polytmp[2*VMAC_TAG_LEN/64]; /* running total of L2-hash */ +}; + /* * Constants and masks */ @@ -318,13 +349,6 @@ static void poly_step_func(u64 *ahi, u64 *alo, } while (0) #endif -static void vhash_abort(struct vmac_ctx *ctx) -{ - ctx->polytmp[0] = ctx->polykey[0] ; - ctx->polytmp[1] = ctx->polykey[1] ; - ctx->first_block_processed = 0; -} - static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len) { u64 rh, rl, t, z = 0; @@ -364,280 +388,209 @@ static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len) return rl; } -static void vhash_update(const unsigned char *m, - unsigned int mbytes, /* Pos multiple of VMAC_NHBYTES */ - struct vmac_ctx *ctx) +/* L1 and L2-hash one or more VMAC_NHBYTES-byte blocks */ +static void vhash_blocks(const struct vmac_tfm_ctx *tctx, + struct vmac_desc_ctx *dctx, + const __le64 *mptr, unsigned int blocks) { - u64 rh, rl, *mptr; - const u64 *kptr = (u64 *)ctx->nhkey; - int i; - u64 ch, cl; - u64 pkh = ctx->polykey[0]; - u64 pkl = ctx->polykey[1]; - - if (!mbytes) - return; - - BUG_ON(mbytes % VMAC_NHBYTES); - - mptr = (u64 *)m; - i = mbytes / VMAC_NHBYTES; /* Must be non-zero */ - - ch = ctx->polytmp[0]; - cl = ctx->polytmp[1]; - - if (!ctx->first_block_processed) { - ctx->first_block_processed = 1; + const u64 *kptr = tctx->nhkey; + const u64 pkh = tctx->polykey[0]; + const u64 pkl = tctx->polykey[1]; + u64 ch = dctx->polytmp[0]; + u64 cl = dctx->polytmp[1]; + u64 rh, rl; + + if (!dctx->first_block_processed) { + dctx->first_block_processed = true; nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl); rh &= m62; ADD128(ch, cl, rh, rl); mptr += (VMAC_NHBYTES/sizeof(u64)); - i--; + blocks--; } - while (i--) { + while (blocks--) { nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl); rh &= m62; poly_step(ch, cl, pkh, pkl, rh, rl); mptr += (VMAC_NHBYTES/sizeof(u64)); } - ctx->polytmp[0] = ch; - ctx->polytmp[1] = cl; + dctx->polytmp[0] = ch; + dctx->polytmp[1] = cl; } -static u64 vhash(unsigned char m[], unsigned int mbytes, - u64 *tagl, struct vmac_ctx *ctx) +static int vmac_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) { - u64 rh, rl, *mptr; - const u64 *kptr = (u64 *)ctx->nhkey; - int i, remaining; - u64 ch, cl; - u64 pkh = ctx->polykey[0]; - u64 pkl = ctx->polykey[1]; - - mptr = (u64 *)m; - i = mbytes / VMAC_NHBYTES; - remaining = mbytes % VMAC_NHBYTES; - - if (ctx->first_block_processed) { - ch = ctx->polytmp[0]; - cl = ctx->polytmp[1]; - } else if (i) { - nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, ch, cl); - ch &= m62; - ADD128(ch, cl, pkh, pkl); - mptr += (VMAC_NHBYTES/sizeof(u64)); - i--; - } else if (remaining) { - nh_16(mptr, kptr, 2*((remaining+15)/16), ch, cl); - ch &= m62; - ADD128(ch, cl, pkh, pkl); - mptr += (VMAC_NHBYTES/sizeof(u64)); - goto do_l3; - } else {/* Empty String */ - ch = pkh; cl = pkl; - goto do_l3; - } - - while (i--) { - nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl); - rh &= m62; - poly_step(ch, cl, pkh, pkl, rh, rl); - mptr += (VMAC_NHBYTES/sizeof(u64)); - } - if (remaining) { - nh_16(mptr, kptr, 2*((remaining+15)/16), rh, rl); - rh &= m62; - poly_step(ch, cl, pkh, pkl, rh, rl); - } - -do_l3: - vhash_abort(ctx); - remaining *= 8; - return l3hash(ch, cl, ctx->l3key[0], ctx->l3key[1], remaining); -} + struct vmac_tfm_ctx *tctx = crypto_shash_ctx(tfm); + __be64 out[2]; + u8 in[16] = { 0 }; + unsigned int i; + int err; -static u64 vmac(unsigned char m[], unsigned int mbytes, - const unsigned char n[16], u64 *tagl, - struct vmac_ctx_t *ctx) -{ - u64 *in_n, *out_p; - u64 p, h; - int i; - - in_n = ctx->__vmac_ctx.cached_nonce; - out_p = ctx->__vmac_ctx.cached_aes; - - i = n[15] & 1; - if ((*(u64 *)(n+8) != in_n[1]) || (*(u64 *)(n) != in_n[0])) { - in_n[0] = *(u64 *)(n); - in_n[1] = *(u64 *)(n+8); - ((unsigned char *)in_n)[15] &= 0xFE; - crypto_cipher_encrypt_one(ctx->child, - (unsigned char *)out_p, (unsigned char *)in_n); - - ((unsigned char *)in_n)[15] |= (unsigned char)(1-i); + if (keylen != VMAC_KEY_LEN) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; } - p = be64_to_cpup(out_p + i); - h = vhash(m, mbytes, (u64 *)0, &ctx->__vmac_ctx); - return le64_to_cpu(p + h); -} -static int vmac_set_key(unsigned char user_key[], struct vmac_ctx_t *ctx) -{ - u64 in[2] = {0}, out[2]; - unsigned i; - int err = 0; - - err = crypto_cipher_setkey(ctx->child, user_key, VMAC_KEY_LEN); + err = crypto_cipher_setkey(tctx->cipher, key, keylen); if (err) return err; /* Fill nh key */ - ((unsigned char *)in)[0] = 0x80; - for (i = 0; i < sizeof(ctx->__vmac_ctx.nhkey)/8; i += 2) { - crypto_cipher_encrypt_one(ctx->child, - (unsigned char *)out, (unsigned char *)in); - ctx->__vmac_ctx.nhkey[i] = be64_to_cpup(out); - ctx->__vmac_ctx.nhkey[i+1] = be64_to_cpup(out+1); - ((unsigned char *)in)[15] += 1; + in[0] = 0x80; + for (i = 0; i < ARRAY_SIZE(tctx->nhkey); i += 2) { + crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in); + tctx->nhkey[i] = be64_to_cpu(out[0]); + tctx->nhkey[i+1] = be64_to_cpu(out[1]); + in[15]++; } /* Fill poly key */ - ((unsigned char *)in)[0] = 0xC0; - in[1] = 0; - for (i = 0; i < sizeof(ctx->__vmac_ctx.polykey)/8; i += 2) { - crypto_cipher_encrypt_one(ctx->child, - (unsigned char *)out, (unsigned char *)in); - ctx->__vmac_ctx.polytmp[i] = - ctx->__vmac_ctx.polykey[i] = - be64_to_cpup(out) & mpoly; - ctx->__vmac_ctx.polytmp[i+1] = - ctx->__vmac_ctx.polykey[i+1] = - be64_to_cpup(out+1) & mpoly; - ((unsigned char *)in)[15] += 1; + in[0] = 0xC0; + in[15] = 0; + for (i = 0; i < ARRAY_SIZE(tctx->polykey); i += 2) { + crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in); + tctx->polykey[i] = be64_to_cpu(out[0]) & mpoly; + tctx->polykey[i+1] = be64_to_cpu(out[1]) & mpoly; + in[15]++; } /* Fill ip key */ - ((unsigned char *)in)[0] = 0xE0; - in[1] = 0; - for (i = 0; i < sizeof(ctx->__vmac_ctx.l3key)/8; i += 2) { + in[0] = 0xE0; + in[15] = 0; + for (i = 0; i < ARRAY_SIZE(tctx->l3key); i += 2) { do { - crypto_cipher_encrypt_one(ctx->child, - (unsigned char *)out, (unsigned char *)in); - ctx->__vmac_ctx.l3key[i] = be64_to_cpup(out); - ctx->__vmac_ctx.l3key[i+1] = be64_to_cpup(out+1); - ((unsigned char *)in)[15] += 1; - } while (ctx->__vmac_ctx.l3key[i] >= p64 - || ctx->__vmac_ctx.l3key[i+1] >= p64); + crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in); + tctx->l3key[i] = be64_to_cpu(out[0]); + tctx->l3key[i+1] = be64_to_cpu(out[1]); + in[15]++; + } while (tctx->l3key[i] >= p64 || tctx->l3key[i+1] >= p64); } - /* Invalidate nonce/aes cache and reset other elements */ - ctx->__vmac_ctx.cached_nonce[0] = (u64)-1; /* Ensure illegal nonce */ - ctx->__vmac_ctx.cached_nonce[1] = (u64)0; /* Ensure illegal nonce */ - ctx->__vmac_ctx.first_block_processed = 0; - - return err; + return 0; } -static int vmac_setkey(struct crypto_shash *parent, - const u8 *key, unsigned int keylen) +static int vmac_init(struct shash_desc *desc) { - struct vmac_ctx_t *ctx = crypto_shash_ctx(parent); - - if (keylen != VMAC_KEY_LEN) { - crypto_shash_set_flags(parent, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - - return vmac_set_key((u8 *)key, ctx); -} + const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct vmac_desc_ctx *dctx = shash_desc_ctx(desc); -static int vmac_init(struct shash_desc *pdesc) -{ + dctx->partial_size = 0; + dctx->first_block_processed = false; + memcpy(dctx->polytmp, tctx->polykey, sizeof(dctx->polytmp)); return 0; } -static int vmac_update(struct shash_desc *pdesc, const u8 *p, - unsigned int len) +static int vmac_update(struct shash_desc *desc, const u8 *p, unsigned int len) { - struct crypto_shash *parent = pdesc->tfm; - struct vmac_ctx_t *ctx = crypto_shash_ctx(parent); - int expand; - int min; - - expand = VMAC_NHBYTES - ctx->partial_size > 0 ? - VMAC_NHBYTES - ctx->partial_size : 0; - - min = len < expand ? len : expand; - - memcpy(ctx->partial + ctx->partial_size, p, min); - ctx->partial_size += min; - - if (len < expand) - return 0; - - vhash_update(ctx->partial, VMAC_NHBYTES, &ctx->__vmac_ctx); - ctx->partial_size = 0; - - len -= expand; - p += expand; + const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct vmac_desc_ctx *dctx = shash_desc_ctx(desc); + unsigned int n; + + if (dctx->partial_size) { + n = min(len, VMAC_NHBYTES - dctx->partial_size); + memcpy(&dctx->partial[dctx->partial_size], p, n); + dctx->partial_size += n; + p += n; + len -= n; + if (dctx->partial_size == VMAC_NHBYTES) { + vhash_blocks(tctx, dctx, dctx->partial_words, 1); + dctx->partial_size = 0; + } + } - if (len % VMAC_NHBYTES) { - memcpy(ctx->partial, p + len - (len % VMAC_NHBYTES), - len % VMAC_NHBYTES); - ctx->partial_size = len % VMAC_NHBYTES; + if (len >= VMAC_NHBYTES) { + n = round_down(len, VMAC_NHBYTES); + /* TODO: 'p' may be misaligned here */ + vhash_blocks(tctx, dctx, (const __le64 *)p, n / VMAC_NHBYTES); + p += n; + len -= n; } - vhash_update(p, len - len % VMAC_NHBYTES, &ctx->__vmac_ctx); + if (len) { + memcpy(dctx->partial, p, len); + dctx->partial_size = len; + } return 0; } -static int vmac_final(struct shash_desc *pdesc, u8 *out) +static u64 vhash_final(const struct vmac_tfm_ctx *tctx, + struct vmac_desc_ctx *dctx) { - struct crypto_shash *parent = pdesc->tfm; - struct vmac_ctx_t *ctx = crypto_shash_ctx(parent); - vmac_t mac; - u8 nonce[16] = {}; - - /* vmac() ends up accessing outside the array bounds that - * we specify. In appears to access up to the next 2-word - * boundary. We'll just be uber cautious and zero the - * unwritten bytes in the buffer. - */ - if (ctx->partial_size) { - memset(ctx->partial + ctx->partial_size, 0, - VMAC_NHBYTES - ctx->partial_size); + unsigned int partial = dctx->partial_size; + u64 ch = dctx->polytmp[0]; + u64 cl = dctx->polytmp[1]; + + /* L1 and L2-hash the final block if needed */ + if (partial) { + /* Zero-pad to next 128-bit boundary */ + unsigned int n = round_up(partial, 16); + u64 rh, rl; + + memset(&dctx->partial[partial], 0, n - partial); + nh_16(dctx->partial_words, tctx->nhkey, n / 8, rh, rl); + rh &= m62; + if (dctx->first_block_processed) + poly_step(ch, cl, tctx->polykey[0], tctx->polykey[1], + rh, rl); + else + ADD128(ch, cl, rh, rl); } - mac = vmac(ctx->partial, ctx->partial_size, nonce, NULL, ctx); - memcpy(out, &mac, sizeof(vmac_t)); - memzero_explicit(&mac, sizeof(vmac_t)); - memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx)); - ctx->partial_size = 0; + + /* L3-hash the 128-bit output of L2-hash */ + return l3hash(ch, cl, tctx->l3key[0], tctx->l3key[1], partial * 8); +} + +static int vmac_final(struct shash_desc *desc, u8 *out) +{ + const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct vmac_desc_ctx *dctx = shash_desc_ctx(desc); + static const u8 nonce[16] = {}; /* TODO: this is insecure */ + union { + u8 bytes[16]; + __be64 pads[2]; + } block; + int index; + u64 hash, pad; + + /* Finish calculating the VHASH of the message */ + hash = vhash_final(tctx, dctx); + + /* Generate pseudorandom pad by encrypting the nonce */ + memcpy(&block, nonce, 16); + index = block.bytes[15] & 1; + block.bytes[15] &= ~1; + crypto_cipher_encrypt_one(tctx->cipher, block.bytes, block.bytes); + pad = be64_to_cpu(block.pads[index]); + + /* The VMAC is the sum of VHASH and the pseudorandom pad */ + put_unaligned_le64(hash + pad, out); return 0; } static int vmac_init_tfm(struct crypto_tfm *tfm) { - struct crypto_cipher *cipher; - struct crypto_instance *inst = (void *)tfm->__crt_alg; + struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm); + struct vmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm); + struct crypto_cipher *cipher; cipher = crypto_spawn_cipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); - ctx->child = cipher; + tctx->cipher = cipher; return 0; } static void vmac_exit_tfm(struct crypto_tfm *tfm) { - struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm); - crypto_free_cipher(ctx->child); + struct vmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm); + + crypto_free_cipher(tctx->cipher); } static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb) @@ -674,11 +627,12 @@ static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.base.cra_blocksize = alg->cra_blocksize; inst->alg.base.cra_alignmask = alg->cra_alignmask; - inst->alg.digestsize = sizeof(vmac_t); - inst->alg.base.cra_ctxsize = sizeof(struct vmac_ctx_t); + inst->alg.base.cra_ctxsize = sizeof(struct vmac_tfm_ctx); inst->alg.base.cra_init = vmac_init_tfm; inst->alg.base.cra_exit = vmac_exit_tfm; + inst->alg.descsize = sizeof(struct vmac_desc_ctx); + inst->alg.digestsize = VMAC_TAG_LEN / 8; inst->alg.init = vmac_init; inst->alg.update = vmac_update; inst->alg.final = vmac_final; diff --git a/include/crypto/vmac.h b/include/crypto/vmac.h deleted file mode 100644 index 6b700c7b2fe1..000000000000 --- a/include/crypto/vmac.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Modified to interface to the Linux kernel - * Copyright (c) 2009, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - */ - -#ifndef __CRYPTO_VMAC_H -#define __CRYPTO_VMAC_H - -/* -------------------------------------------------------------------------- - * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai. - * This implementation is herby placed in the public domain. - * The authors offers no warranty. Use at your own risk. - * Please send bug reports to the authors. - * Last modified: 17 APR 08, 1700 PDT - * ----------------------------------------------------------------------- */ - -/* - * User definable settings. - */ -#define VMAC_TAG_LEN 64 -#define VMAC_KEY_SIZE 128/* Must be 128, 192 or 256 */ -#define VMAC_KEY_LEN (VMAC_KEY_SIZE/8) -#define VMAC_NHBYTES 128/* Must 2^i for any 3 < i < 13 Standard = 128*/ - -/* - * This implementation uses u32 and u64 as names for unsigned 32- - * and 64-bit integer types. These are defined in C99 stdint.h. The - * following may need adaptation if you are not running a C99 or - * Microsoft C environment. - */ -struct vmac_ctx { - u64 nhkey[(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)]; - u64 polykey[2*VMAC_TAG_LEN/64]; - u64 l3key[2*VMAC_TAG_LEN/64]; - u64 polytmp[2*VMAC_TAG_LEN/64]; - u64 cached_nonce[2]; - u64 cached_aes[2]; - int first_block_processed; -}; - -typedef u64 vmac_t; - -struct vmac_ctx_t { - struct crypto_cipher *child; - struct vmac_ctx __vmac_ctx; - u8 partial[VMAC_NHBYTES]; /* partial block */ - int partial_size; /* size of the partial block */ -}; - -#endif /* __CRYPTO_VMAC_H */ -- GitLab From 2cde72d94f0a3a12681e9321a7b9e15b524bf15f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 23 Jul 2018 10:54:57 -0700 Subject: [PATCH 0768/1291] crypto: blkcipher - fix crash flushing dcache in error path commit 0868def3e4100591e7a1fdbf3eed1439cc8f7ca3 upstream. Like the skcipher_walk case: scatterwalk_done() is only meant to be called after a nonzero number of bytes have been processed, since scatterwalk_pagedone() will flush the dcache of the *previous* page. But in the error case of blkcipher_walk_done(), e.g. if the input wasn't an integer number of blocks, scatterwalk_done() was actually called after advancing 0 bytes. This caused a crash ("BUG: unable to handle kernel paging request") during '!PageSlab(page)' on architectures like arm and arm64 that define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE, provided that the input was page-aligned as in that case walk->offset == 0. Fix it by reorganizing blkcipher_walk_done() to skip the scatterwalk_advance() and scatterwalk_done() if an error has occurred. This bug was found by syzkaller fuzzing. Reproducer, assuming ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE: #include #include #include int main() { struct sockaddr_alg addr = { .salg_type = "skcipher", .salg_name = "ecb(aes-generic)", }; char buffer[4096] __attribute__((aligned(4096))) = { 0 }; int fd; fd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(fd, (void *)&addr, sizeof(addr)); setsockopt(fd, SOL_ALG, ALG_SET_KEY, buffer, 16); fd = accept(fd, NULL, NULL); write(fd, buffer, 15); read(fd, buffer, 15); } Reported-by: Liu Chao Fixes: 5cde0af2a982 ("[CRYPTO] cipher: Added block cipher type") Cc: # v2.6.19+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/blkcipher.c | 54 ++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index 6c43a0a17a55..d84c6920ada9 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -71,19 +71,18 @@ static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len) return max(start, end_page); } -static inline unsigned int blkcipher_done_slow(struct blkcipher_walk *walk, - unsigned int bsize) +static inline void blkcipher_done_slow(struct blkcipher_walk *walk, + unsigned int bsize) { u8 *addr; addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1); addr = blkcipher_get_spot(addr, bsize); scatterwalk_copychunks(addr, &walk->out, bsize, 1); - return bsize; } -static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk, - unsigned int n) +static inline void blkcipher_done_fast(struct blkcipher_walk *walk, + unsigned int n) { if (walk->flags & BLKCIPHER_WALK_COPY) { blkcipher_map_dst(walk); @@ -97,49 +96,48 @@ static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk, scatterwalk_advance(&walk->in, n); scatterwalk_advance(&walk->out, n); - - return n; } int blkcipher_walk_done(struct blkcipher_desc *desc, struct blkcipher_walk *walk, int err) { - unsigned int nbytes = 0; + unsigned int n; /* bytes processed */ + bool more; - if (likely(err >= 0)) { - unsigned int n = walk->nbytes - err; + if (unlikely(err < 0)) + goto finish; - if (likely(!(walk->flags & BLKCIPHER_WALK_SLOW))) - n = blkcipher_done_fast(walk, n); - else if (WARN_ON(err)) { - err = -EINVAL; - goto err; - } else - n = blkcipher_done_slow(walk, n); + n = walk->nbytes - err; + walk->total -= n; + more = (walk->total != 0); - nbytes = walk->total - n; - err = 0; + if (likely(!(walk->flags & BLKCIPHER_WALK_SLOW))) { + blkcipher_done_fast(walk, n); + } else { + if (WARN_ON(err)) { + /* unexpected case; didn't process all bytes */ + err = -EINVAL; + goto finish; + } + blkcipher_done_slow(walk, n); } - scatterwalk_done(&walk->in, 0, nbytes); - scatterwalk_done(&walk->out, 1, nbytes); + scatterwalk_done(&walk->in, 0, more); + scatterwalk_done(&walk->out, 1, more); -err: - walk->total = nbytes; - walk->nbytes = nbytes; - - if (nbytes) { + if (more) { crypto_yield(desc->flags); return blkcipher_walk_next(desc, walk); } - + err = 0; +finish: + walk->nbytes = 0; if (walk->iv != desc->info) memcpy(desc->info, walk->iv, walk->ivsize); if (walk->buffer != walk->page) kfree(walk->buffer); if (walk->page) free_page((unsigned long)walk->page); - return err; } EXPORT_SYMBOL_GPL(blkcipher_walk_done); -- GitLab From 68432fd1665b6c0647cf2cce6c218fea4761a5b4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 23 Jul 2018 10:54:58 -0700 Subject: [PATCH 0769/1291] crypto: ablkcipher - fix crash flushing dcache in error path commit 318abdfbe708aaaa652c79fb500e9bd60521f9dc upstream. Like the skcipher_walk and blkcipher_walk cases: scatterwalk_done() is only meant to be called after a nonzero number of bytes have been processed, since scatterwalk_pagedone() will flush the dcache of the *previous* page. But in the error case of ablkcipher_walk_done(), e.g. if the input wasn't an integer number of blocks, scatterwalk_done() was actually called after advancing 0 bytes. This caused a crash ("BUG: unable to handle kernel paging request") during '!PageSlab(page)' on architectures like arm and arm64 that define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE, provided that the input was page-aligned as in that case walk->offset == 0. Fix it by reorganizing ablkcipher_walk_done() to skip the scatterwalk_advance() and scatterwalk_done() if an error has occurred. Reported-by: Liu Chao Fixes: bf06099db18a ("crypto: skcipher - Add ablkcipher_walk interfaces") Cc: # v2.6.35+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ablkcipher.c | 57 +++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index d880a4897159..4ee7c041bb82 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -71,11 +71,9 @@ static inline u8 *ablkcipher_get_spot(u8 *start, unsigned int len) return max(start, end_page); } -static inline unsigned int ablkcipher_done_slow(struct ablkcipher_walk *walk, - unsigned int bsize) +static inline void ablkcipher_done_slow(struct ablkcipher_walk *walk, + unsigned int n) { - unsigned int n = bsize; - for (;;) { unsigned int len_this_page = scatterwalk_pagelen(&walk->out); @@ -87,17 +85,13 @@ static inline unsigned int ablkcipher_done_slow(struct ablkcipher_walk *walk, n -= len_this_page; scatterwalk_start(&walk->out, sg_next(walk->out.sg)); } - - return bsize; } -static inline unsigned int ablkcipher_done_fast(struct ablkcipher_walk *walk, - unsigned int n) +static inline void ablkcipher_done_fast(struct ablkcipher_walk *walk, + unsigned int n) { scatterwalk_advance(&walk->in, n); scatterwalk_advance(&walk->out, n); - - return n; } static int ablkcipher_walk_next(struct ablkcipher_request *req, @@ -107,39 +101,40 @@ int ablkcipher_walk_done(struct ablkcipher_request *req, struct ablkcipher_walk *walk, int err) { struct crypto_tfm *tfm = req->base.tfm; - unsigned int nbytes = 0; + unsigned int n; /* bytes processed */ + bool more; - if (likely(err >= 0)) { - unsigned int n = walk->nbytes - err; + if (unlikely(err < 0)) + goto finish; - if (likely(!(walk->flags & ABLKCIPHER_WALK_SLOW))) - n = ablkcipher_done_fast(walk, n); - else if (WARN_ON(err)) { - err = -EINVAL; - goto err; - } else - n = ablkcipher_done_slow(walk, n); + n = walk->nbytes - err; + walk->total -= n; + more = (walk->total != 0); - nbytes = walk->total - n; - err = 0; + if (likely(!(walk->flags & ABLKCIPHER_WALK_SLOW))) { + ablkcipher_done_fast(walk, n); + } else { + if (WARN_ON(err)) { + /* unexpected case; didn't process all bytes */ + err = -EINVAL; + goto finish; + } + ablkcipher_done_slow(walk, n); } - scatterwalk_done(&walk->in, 0, nbytes); - scatterwalk_done(&walk->out, 1, nbytes); - -err: - walk->total = nbytes; - walk->nbytes = nbytes; + scatterwalk_done(&walk->in, 0, more); + scatterwalk_done(&walk->out, 1, more); - if (nbytes) { + if (more) { crypto_yield(req->base.flags); return ablkcipher_walk_next(req, walk); } - + err = 0; +finish: + walk->nbytes = 0; if (walk->iv != req->info) memcpy(req->info, walk->iv, tfm->crt_ablkcipher.ivsize); kfree(walk->iv_buffer); - return err; } EXPORT_SYMBOL_GPL(ablkcipher_walk_done); -- GitLab From 0f2981ee03ff3f9678a43592065d0ae95de451dc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 23 Jul 2018 09:57:50 -0700 Subject: [PATCH 0770/1291] crypto: skcipher - fix aligning block size in skcipher_copy_iv() commit 0567fc9e90b9b1c8dbce8a5468758e6206744d4a upstream. The ALIGN() macro needs to be passed the alignment, not the alignmask (which is the alignment minus 1). Fixes: b286d8b1a690 ("crypto: skcipher - Add skcipher walk interface") Cc: # v4.10+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/skcipher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 11af5fd6a443..5900c8a43dea 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -399,7 +399,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk) unsigned size; u8 *iv; - aligned_bs = ALIGN(bs, alignmask); + aligned_bs = ALIGN(bs, alignmask + 1); /* Minimum size to align buffer by alignmask. */ size = alignmask & ~a; -- GitLab From 7e179bffb6811ecfd653cc651d44bae55941bc97 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 23 Jul 2018 10:54:56 -0700 Subject: [PATCH 0771/1291] crypto: skcipher - fix crash flushing dcache in error path commit 8088d3dd4d7c6933a65aa169393b5d88d8065672 upstream. scatterwalk_done() is only meant to be called after a nonzero number of bytes have been processed, since scatterwalk_pagedone() will flush the dcache of the *previous* page. But in the error case of skcipher_walk_done(), e.g. if the input wasn't an integer number of blocks, scatterwalk_done() was actually called after advancing 0 bytes. This caused a crash ("BUG: unable to handle kernel paging request") during '!PageSlab(page)' on architectures like arm and arm64 that define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE, provided that the input was page-aligned as in that case walk->offset == 0. Fix it by reorganizing skcipher_walk_done() to skip the scatterwalk_advance() and scatterwalk_done() if an error has occurred. This bug was found by syzkaller fuzzing. Reproducer, assuming ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE: #include #include #include int main() { struct sockaddr_alg addr = { .salg_type = "skcipher", .salg_name = "cbc(aes-generic)", }; char buffer[4096] __attribute__((aligned(4096))) = { 0 }; int fd; fd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(fd, (void *)&addr, sizeof(addr)); setsockopt(fd, SOL_ALG, ALG_SET_KEY, buffer, 16); fd = accept(fd, NULL, NULL); write(fd, buffer, 15); read(fd, buffer, 15); } Reported-by: Liu Chao Fixes: b286d8b1a690 ("crypto: skcipher - Add skcipher walk interface") Cc: # v4.10+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/skcipher.c | 49 ++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 5900c8a43dea..e319421a32e7 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -95,7 +95,7 @@ static inline u8 *skcipher_get_spot(u8 *start, unsigned int len) return max(start, end_page); } -static int skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize) +static void skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize) { u8 *addr; @@ -103,23 +103,24 @@ static int skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize) addr = skcipher_get_spot(addr, bsize); scatterwalk_copychunks(addr, &walk->out, bsize, (walk->flags & SKCIPHER_WALK_PHYS) ? 2 : 1); - return 0; } int skcipher_walk_done(struct skcipher_walk *walk, int err) { - unsigned int n = walk->nbytes - err; - unsigned int nbytes; + unsigned int n; /* bytes processed */ + bool more; + + if (unlikely(err < 0)) + goto finish; - nbytes = walk->total - n; + n = walk->nbytes - err; + walk->total -= n; + more = (walk->total != 0); - if (unlikely(err < 0)) { - nbytes = 0; - n = 0; - } else if (likely(!(walk->flags & (SKCIPHER_WALK_PHYS | - SKCIPHER_WALK_SLOW | - SKCIPHER_WALK_COPY | - SKCIPHER_WALK_DIFF)))) { + if (likely(!(walk->flags & (SKCIPHER_WALK_PHYS | + SKCIPHER_WALK_SLOW | + SKCIPHER_WALK_COPY | + SKCIPHER_WALK_DIFF)))) { unmap_src: skcipher_unmap_src(walk); } else if (walk->flags & SKCIPHER_WALK_DIFF) { @@ -131,28 +132,28 @@ int skcipher_walk_done(struct skcipher_walk *walk, int err) skcipher_unmap_dst(walk); } else if (unlikely(walk->flags & SKCIPHER_WALK_SLOW)) { if (WARN_ON(err)) { + /* unexpected case; didn't process all bytes */ err = -EINVAL; - nbytes = 0; - } else - n = skcipher_done_slow(walk, n); + goto finish; + } + skcipher_done_slow(walk, n); + goto already_advanced; } - if (err > 0) - err = 0; - - walk->total = nbytes; - walk->nbytes = nbytes; - scatterwalk_advance(&walk->in, n); scatterwalk_advance(&walk->out, n); - scatterwalk_done(&walk->in, 0, nbytes); - scatterwalk_done(&walk->out, 1, nbytes); +already_advanced: + scatterwalk_done(&walk->in, 0, more); + scatterwalk_done(&walk->out, 1, more); - if (nbytes) { + if (more) { crypto_yield(walk->flags & SKCIPHER_WALK_SLEEP ? CRYPTO_TFM_REQ_MAY_SLEEP : 0); return skcipher_walk_next(walk); } + err = 0; +finish: + walk->nbytes = 0; /* Short-circuit for the common/fast path. */ if (!((unsigned long)walk->buffer | (unsigned long)walk->page)) -- GitLab From e5f4bae2e3c8088807037a119daa2d91aa07908c Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 6 Nov 2017 18:44:25 +0000 Subject: [PATCH 0772/1291] ACPI / APEI: Remove ghes_ioremap_area commit 520e18a5080d2c444a03280d99c8a35cb667d321 upstream. Now that nothing is using the ghes_ioremap_area pages, rip them out. Signed-off-by: James Morse Reviewed-by: Borislav Petkov Tested-by: Tyler Baicar Signed-off-by: Rafael J. Wysocki Cc: All applicable Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/apei/ghes.c | 39 ++------------------------------------- 1 file changed, 2 insertions(+), 37 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 572b6c7303ed..f14695e744d0 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -114,19 +114,7 @@ static DEFINE_MUTEX(ghes_list_mutex); * from BIOS to Linux can be determined only in NMI, IRQ or timer * handler, but general ioremap can not be used in atomic context, so * the fixmap is used instead. - */ - -/* - * Two virtual pages are used, one for IRQ/PROCESS context, the other for - * NMI context (optionally). - */ -#define GHES_IOREMAP_PAGES 2 -#define GHES_IOREMAP_IRQ_PAGE(base) (base) -#define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE) - -/* virtual memory area for atomic ioremap */ -static struct vm_struct *ghes_ioremap_area; -/* + * * These 2 spinlocks are used to prevent the fixmap entries from being used * simultaneously. */ @@ -141,23 +129,6 @@ static atomic_t ghes_estatus_cache_alloced; static int ghes_panic_timeout __read_mostly = 30; -static int ghes_ioremap_init(void) -{ - ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, - VM_IOREMAP, VMALLOC_START, VMALLOC_END); - if (!ghes_ioremap_area) { - pr_err(GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n"); - return -ENOMEM; - } - - return 0; -} - -static void ghes_ioremap_exit(void) -{ - free_vm_area(ghes_ioremap_area); -} - static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn) { phys_addr_t paddr; @@ -1247,13 +1218,9 @@ static int __init ghes_init(void) ghes_nmi_init_cxt(); - rc = ghes_ioremap_init(); - if (rc) - goto err; - rc = ghes_estatus_pool_init(); if (rc) - goto err_ioremap_exit; + goto err; rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX); @@ -1277,8 +1244,6 @@ static int __init ghes_init(void) return 0; err_pool_exit: ghes_estatus_pool_exit(); -err_ioremap_exit: - ghes_ioremap_exit(); err: return rc; } -- GitLab From 13f12749af15149d0b10b30bf3472b62deded15a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 22 Nov 2017 13:05:48 +0100 Subject: [PATCH 0773/1291] sched/debug: Fix task state recording/printout commit 3f5fe9fef5b2da06b6319fab8123056da5217c3f upstream. The recent conversion of the task state recording to use task_state_index() broke the sched_switch tracepoint task state output. task_state_index() returns surprisingly an index (0-7) which is then printed with __print_flags() applying bitmasks. Not really working and resulting in weird states like 'prev_state=t' instead of 'prev_state=I'. Use TASK_REPORT_MAX instead of TASK_STATE_MAX to report preemption. Build a bitmask from the return value of task_state_index() and store it in entry->prev_state, which makes __print_flags() work as expected. Signed-off-by: Thomas Gleixner Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: stable@vger.kernel.org Fixes: efb40f588b43 ("sched/tracing: Fix trace_sched_switch task-state printing") Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1711221304180.1751@nanos Signed-off-by: Ingo Molnar Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- include/trace/events/sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index d447f24df970..0812cd5408c9 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -116,9 +116,9 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * * RUNNING (we will not have dequeued if state != RUNNING). */ if (preempt) - return TASK_STATE_MAX; + return TASK_REPORT_MAX; - return __get_task_state(p); + return 1 << __get_task_state(p); } #endif /* CREATE_TRACE_POINTS */ @@ -164,7 +164,7 @@ TRACE_EVENT(sched_switch, { 0x40, "P" }, { 0x80, "I" }) : "R", - __entry->prev_state & TASK_STATE_MAX ? "+" : "", + __entry->prev_state & TASK_REPORT_MAX ? "+" : "", __entry->next_comm, __entry->next_pid, __entry->next_prio) ); -- GitLab From de59521d508a32c6d9f3ae2bc4473658cd12df79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Mon, 20 Nov 2017 10:55:15 +0100 Subject: [PATCH 0774/1291] fw_cfg: fix driver remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 23f1b8d938c861ee0bbb786162f7ce0685f722ec upstream. On driver remove(), all objects created during probe() should be removed, but sysfs qemu_fw_cfg/rev file was left. Also reorder functions to match probe() error cleanup code. Cc: stable@vger.kernel.org Signed-off-by: Marc-André Lureau Signed-off-by: Michael S. Tsirkin Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/qemu_fw_cfg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index 0e2011636fbb..c53c7ac992f8 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -582,9 +582,10 @@ static int fw_cfg_sysfs_remove(struct platform_device *pdev) { pr_debug("fw_cfg: unloading.\n"); fw_cfg_sysfs_cache_cleanup(); + sysfs_remove_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr); + fw_cfg_io_cleanup(); fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset); fw_cfg_kobj_cleanup(fw_cfg_sel_ko); - fw_cfg_io_cleanup(); return 0; } -- GitLab From c13fddcb98227d17cac2f4bc943ea81916839886 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 11 Sep 2017 04:17:26 +0000 Subject: [PATCH 0775/1291] ASoC: rsnd: fix ADG flags commit b7165d26bf730567ab081bb9383aff82cd43d9ea upstream. Current ADG driver is over-writing flags. This patch fixes it. Reported-by: Hiroyuki Yokoyama Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/adg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c index 2684a2ba33cd..e28edb1f7263 100644 --- a/sound/soc/sh/rcar/adg.c +++ b/sound/soc/sh/rcar/adg.c @@ -479,10 +479,10 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv, } if (req_rate[0] % 48000 == 0) - adg->flags = AUDIO_OUT_48; + adg->flags |= AUDIO_OUT_48; if (of_get_property(np, "clkout-lr-asynchronous", NULL)) - adg->flags = LRCLK_ASYNC; + adg->flags |= LRCLK_ASYNC; /* * This driver is assuming that AUDIO_CLKA/AUDIO_CLKB/AUDIO_CLKC -- GitLab From 3f745fe61ac6c9e9783ec4c247171ac616d66c8b Mon Sep 17 00:00:00 2001 From: Alexander Syring Date: Thu, 14 Sep 2017 14:05:19 +0200 Subject: [PATCH 0776/1291] clk: sunxi-ng: Fix missing CLK_SET_RATE_PARENT in ccu-sun4i-a10.c commit a894990ac994a53bc5a0cc694eb12f3c064c18c5 upstream. When using cpufreq-dt with default govenor other than "performance" system freezes while booting. Adding CLK_SET_RATE_PARENT | CLK_IS_CRITICAL to clk_cpu fixes the problem. Tested on Cubietruck (A20). Fixes: c84f5683f6E ("clk: sunxi-ng: Add sun4i/sun7i CCU driver") Acked-by: Chen-Yu Tsai Signed-off-by: Alexander Syring Signed-off-by: Maxime Ripard Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/clk/sunxi-ng/ccu-sun4i-a10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c index 286b0049b7b6..a48fde191c0a 100644 --- a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c +++ b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c @@ -223,7 +223,7 @@ static struct ccu_mux cpu_clk = { .hw.init = CLK_HW_INIT_PARENTS("cpu", cpu_parents, &ccu_mux_ops, - CLK_IS_CRITICAL), + CLK_SET_RATE_PARENT | CLK_IS_CRITICAL), } }; -- GitLab From 1a2d99218d87868841f2dd301c55fcf7e5374b30 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Thu, 21 Sep 2017 11:04:00 +0200 Subject: [PATCH 0777/1291] block, bfq: fix wrong init of saved start time for weight raising commit 4baa8bb13f41307f3eb62fe91f93a1a798ebef53 upstream. This commit fixes a bug that causes bfq to fail to guarantee a high responsiveness on some drives, if there is heavy random read+write I/O in the background. More precisely, such a failure allowed this bug to be found [1], but the bug may well cause other yet unreported anomalies. BFQ raises the weight of the bfq_queues associated with soft real-time applications, to privilege the I/O, and thus reduce latency, for these applications. This mechanism is named soft-real-time weight raising in BFQ. A soft real-time period may happen to be nested into an interactive weight raising period, i.e., it may happen that, when a bfq_queue switches to a soft real-time weight-raised state, the bfq_queue is already being weight-raised because deemed interactive too. In this case, BFQ saves in a special variable wr_start_at_switch_to_srt, the time instant when the interactive weight-raising period started for the bfq_queue, i.e., the time instant when BFQ started to deem the bfq_queue interactive. This value is then used to check whether the interactive weight-raising period would still be in progress when the soft real-time weight-raising period ends. If so, interactive weight raising is restored for the bfq_queue. This restore is useful, in particular, because it prevents bfq_queues from losing their interactive weight raising prematurely, as a consequence of spurious, short-lived soft real-time weight-raising periods caused by wrong detections as soft real-time. If, instead, a bfq_queue switches to soft-real-time weight raising while it *is not* already in an interactive weight-raising period, then the variable wr_start_at_switch_to_srt has no meaning during the following soft real-time weight-raising period. Unfortunately the handling of this case is wrong in BFQ: not only the variable is not flagged somehow as meaningless, but it is also set to the time when the switch to soft real-time weight-raising occurs. This may cause an interactive weight-raising period to be considered mistakenly as still in progress, and thus a spurious interactive weight-raising period to start for the bfq_queue, at the end of the soft-real-time weight-raising period. In particular the spurious interactive weight-raising period will be considered as still in progress, if the soft-real-time weight-raising period does not last very long. The bfq_queue will then be wrongly privileged and, if I/O bound, will unjustly steal bandwidth to truly interactive or soft real-time bfq_queues, harming responsiveness and low latency. This commit fixes this issue by just setting wr_start_at_switch_to_srt to minus infinity (farthest past time instant according to jiffies macros): when the soft-real-time weight-raising period ends, certainly no interactive weight-raising period will be considered as still in progress. [1] Background I/O Type: Random - Background I/O mix: Reads and writes - Application to start: LibreOffice Writer in http://www.phoronix.com/scan.php?page=news_item&px=Linux-4.13-IO-Laptop Signed-off-by: Paolo Valente Signed-off-by: Angelo Ruocco Tested-by: Oleksandr Natalenko Tested-by: Lee Tibbert Tested-by: Mirko Montanari Signed-off-by: Jens Axboe Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- block/bfq-iosched.c | 50 ++++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 4a4b7d3c909a..3b44bd28fc45 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -1203,6 +1203,24 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd) return dur; } +/* + * Return the farthest future time instant according to jiffies + * macros. + */ +static unsigned long bfq_greatest_from_now(void) +{ + return jiffies + MAX_JIFFY_OFFSET; +} + +/* + * Return the farthest past time instant according to jiffies + * macros. + */ +static unsigned long bfq_smallest_from_now(void) +{ + return jiffies - MAX_JIFFY_OFFSET; +} + static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, struct bfq_queue *bfqq, unsigned int old_wr_coeff, @@ -1217,7 +1235,19 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, bfqq->wr_coeff = bfqd->bfq_wr_coeff; bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); } else { - bfqq->wr_start_at_switch_to_srt = jiffies; + /* + * No interactive weight raising in progress + * here: assign minus infinity to + * wr_start_at_switch_to_srt, to make sure + * that, at the end of the soft-real-time + * weight raising periods that is starting + * now, no interactive weight-raising period + * may be wrongly considered as still in + * progress (and thus actually started by + * mistake). + */ + bfqq->wr_start_at_switch_to_srt = + bfq_smallest_from_now(); bfqq->wr_coeff = bfqd->bfq_wr_coeff * BFQ_SOFTRT_WEIGHT_FACTOR; bfqq->wr_cur_max_time = @@ -2896,24 +2926,6 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); } -/* - * Return the farthest future time instant according to jiffies - * macros. - */ -static unsigned long bfq_greatest_from_now(void) -{ - return jiffies + MAX_JIFFY_OFFSET; -} - -/* - * Return the farthest past time instant according to jiffies - * macros. - */ -static unsigned long bfq_smallest_from_now(void) -{ - return jiffies - MAX_JIFFY_OFFSET; -} - /** * bfq_bfqq_expire - expire a queue. * @bfqd: device owning the queue. -- GitLab From 643a9d6d4ff70c3e6580014299bba3e75e3dc620 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-Fran=C3=A7ois=20T=C3=AAtu?= Date: Fri, 22 Sep 2017 17:44:28 -0400 Subject: [PATCH 0778/1291] ASoC: msm8916-wcd-digital: fix RX2 MIX1 and RX3 MIX1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f53ee247ad546183fc13739adafc5579b9f0ebc0 upstream. The kcontrol for the third input (rxN_mix1_inp3) of both RX2 and RX3 mixers are not using the correct control register. This simple patch fixes this. Signed-off-by: Jean-François Têtu Signed-off-by: Mark Brown Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/msm8916-wcd-digital.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c index 694db27b11fa..13354d6304a8 100644 --- a/sound/soc/codecs/msm8916-wcd-digital.c +++ b/sound/soc/codecs/msm8916-wcd-digital.c @@ -238,7 +238,7 @@ static const struct soc_enum rx_mix2_inp1_chain_enum = SOC_ENUM_SINGLE( static const struct soc_enum rx2_mix1_inp_enum[] = { SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX2_B1_CTL, 0, 6, rx_mix1_text), SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX2_B1_CTL, 3, 6, rx_mix1_text), - SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX2_B1_CTL, 0, 6, rx_mix1_text), + SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX2_B2_CTL, 0, 6, rx_mix1_text), }; /* RX2 MIX2 */ @@ -249,7 +249,7 @@ static const struct soc_enum rx2_mix2_inp1_chain_enum = SOC_ENUM_SINGLE( static const struct soc_enum rx3_mix1_inp_enum[] = { SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX3_B1_CTL, 0, 6, rx_mix1_text), SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX3_B1_CTL, 3, 6, rx_mix1_text), - SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX3_B1_CTL, 0, 6, rx_mix1_text), + SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX3_B2_CTL, 0, 6, rx_mix1_text), }; /* DEC */ -- GitLab From 9efed51870be4cca7725da7bcebf4cffb4b8f1a9 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Fri, 8 Sep 2017 00:13:08 -0500 Subject: [PATCH 0779/1291] ASoC: Intel: cht_bsw_max98090_ti: Fix jack initialization commit 3bbda5a38601f7675a214be2044e41d7749e6c7b upstream. If the ts3a227e audio accessory detection hardware is present and its driver probed, the jack needs to be created before enabling jack detection in the ts3a227e driver. With this patch, the jack is instantiated in the max98090 headset init function if the ts3a227e is present. This fixes a null pointer dereference as the jack detection enabling function in the ts3a driver was called before the jack is created. [minor correction to keep error handling on jack creation the same as before by Pierre Bossart] Signed-off-by: Thierry Escande Signed-off-by: Pierre-Louis Bossart Acked-By: Vinod Koul Signed-off-by: Mark Brown Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/boards/cht_bsw_max98090_ti.c | 45 ++++++++++++++------ 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c index 20755ecc7f9e..a02dec251afe 100644 --- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c +++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c @@ -116,23 +116,19 @@ static int cht_codec_init(struct snd_soc_pcm_runtime *runtime) struct cht_mc_private *ctx = snd_soc_card_get_drvdata(runtime->card); struct snd_soc_jack *jack = &ctx->jack; - /** - * TI supports 4 butons headset detection - * KEY_MEDIA - * KEY_VOICECOMMAND - * KEY_VOLUMEUP - * KEY_VOLUMEDOWN - */ - if (ctx->ts3a227e_present) - jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE | - SND_JACK_BTN_0 | SND_JACK_BTN_1 | - SND_JACK_BTN_2 | SND_JACK_BTN_3; - else - jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE; + if (ctx->ts3a227e_present) { + /* + * The jack has already been created in the + * cht_max98090_headset_init() function. + */ + snd_soc_jack_notifier_register(jack, &cht_jack_nb); + return 0; + } + + jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE; ret = snd_soc_card_jack_new(runtime->card, "Headset Jack", jack_type, jack, NULL, 0); - if (ret) { dev_err(runtime->dev, "Headset Jack creation failed %d\n", ret); return ret; @@ -188,6 +184,27 @@ static int cht_max98090_headset_init(struct snd_soc_component *component) { struct snd_soc_card *card = component->card; struct cht_mc_private *ctx = snd_soc_card_get_drvdata(card); + struct snd_soc_jack *jack = &ctx->jack; + int jack_type; + int ret; + + /* + * TI supports 4 butons headset detection + * KEY_MEDIA + * KEY_VOICECOMMAND + * KEY_VOLUMEUP + * KEY_VOLUMEDOWN + */ + jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE | + SND_JACK_BTN_0 | SND_JACK_BTN_1 | + SND_JACK_BTN_2 | SND_JACK_BTN_3; + + ret = snd_soc_card_jack_new(card, "Headset Jack", jack_type, + jack, NULL, 0); + if (ret) { + dev_err(card->dev, "Headset Jack creation failed %d\n", ret); + return ret; + } return ts3a227e_enable_jack_detect(component, &ctx->jack); } -- GitLab From 6e2c702e797c25b49dac3a9f663c449f30cf8efc Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Tue, 31 Jul 2018 15:02:13 -0700 Subject: [PATCH 0780/1291] Bluetooth: hidp: buffer overflow in hidp_process_report commit 7992c18810e568b95c869b227137a2215702a805 upstream. CVE-2018-9363 The buffer length is unsigned at all layers, but gets cast to int and checked in hidp_process_report and can lead to a buffer overflow. Switch len parameter to unsigned int to resolve issue. This affects 3.18 and newer kernels. Signed-off-by: Mark Salyzyn Fixes: a4b1b5877b514b276f0f31efe02388a9c2836728 ("HID: Bluetooth: hidp: make sure input buffers are big enough") Cc: Marcel Holtmann Cc: Johan Hedberg Cc: "David S. Miller" Cc: Kees Cook Cc: Benjamin Tissoires Cc: linux-bluetooth@vger.kernel.org Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: security@kernel.org Cc: kernel-team@android.com Acked-by: Kees Cook Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hidp/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 8112893037bd..cef3754408d4 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -431,8 +431,8 @@ static void hidp_del_timer(struct hidp_session *session) del_timer(&session->timer); } -static void hidp_process_report(struct hidp_session *session, - int type, const u8 *data, int len, int intr) +static void hidp_process_report(struct hidp_session *session, int type, + const u8 *data, unsigned int len, int intr) { if (len > HID_MAX_BUFFER_SIZE) len = HID_MAX_BUFFER_SIZE; -- GitLab From a34806961be4b42f3f40c8026eca95bbbbaf78a3 Mon Sep 17 00:00:00 2001 From: Chintan Pandya Date: Wed, 27 Jun 2018 08:13:47 -0600 Subject: [PATCH 0781/1291] ioremap: Update pgtable free interfaces with addr commit 785a19f9d1dd8a4ab2d0633be4656653bd3de1fc upstream. The following kernel panic was observed on ARM64 platform due to a stale TLB entry. 1. ioremap with 4K size, a valid pte page table is set. 2. iounmap it, its pte entry is set to 0. 3. ioremap the same address with 2M size, update its pmd entry with a new value. 4. CPU may hit an exception because the old pmd entry is still in TLB, which leads to a kernel panic. Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page table") has addressed this panic by falling to pte mappings in the above case on ARM64. To support pmd mappings in all cases, TLB purge needs to be performed in this case on ARM64. Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page() so that TLB purge can be added later in seprate patches. [toshi.kani@hpe.com: merge changes, rewrite patch description] Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") Signed-off-by: Chintan Pandya Signed-off-by: Toshi Kani Signed-off-by: Thomas Gleixner Cc: mhocko@suse.com Cc: akpm@linux-foundation.org Cc: hpa@zytor.com Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: Will Deacon Cc: Joerg Roedel Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: Michal Hocko Cc: "H. Peter Anvin" Cc: Link: https://lkml.kernel.org/r/20180627141348.21777-3-toshi.kani@hpe.com Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/mmu.c | 4 ++-- arch/x86/mm/pgtable.c | 12 +++++++----- include/asm-generic/pgtable.h | 8 ++++---- lib/ioremap.c | 4 ++-- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f6b877d2726d..6ac0d32d60a5 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -938,12 +938,12 @@ int pmd_clear_huge(pmd_t *pmd) return 1; } -int pud_free_pmd_page(pud_t *pud) +int pud_free_pmd_page(pud_t *pud, unsigned long addr) { return pud_none(*pud); } -int pmd_free_pte_page(pmd_t *pmd) +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { return pmd_none(*pmd); } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index debb7b7650b2..31286ff04141 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -716,11 +716,12 @@ int pmd_clear_huge(pmd_t *pmd) /** * pud_free_pmd_page - Clear pud entry and free pmd page. * @pud: Pointer to a PUD. + * @addr: Virtual address associated with pud. * * Context: The pud range has been unmaped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. */ -int pud_free_pmd_page(pud_t *pud) +int pud_free_pmd_page(pud_t *pud, unsigned long addr) { pmd_t *pmd; int i; @@ -731,7 +732,7 @@ int pud_free_pmd_page(pud_t *pud) pmd = (pmd_t *)pud_page_vaddr(*pud); for (i = 0; i < PTRS_PER_PMD; i++) - if (!pmd_free_pte_page(&pmd[i])) + if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE))) return 0; pud_clear(pud); @@ -743,11 +744,12 @@ int pud_free_pmd_page(pud_t *pud) /** * pmd_free_pte_page - Clear pmd entry and free pte page. * @pmd: Pointer to a PMD. + * @addr: Virtual address associated with pmd. * * Context: The pmd range has been unmaped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. */ -int pmd_free_pte_page(pmd_t *pmd) +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { pte_t *pte; @@ -763,7 +765,7 @@ int pmd_free_pte_page(pmd_t *pmd) #else /* !CONFIG_X86_64 */ -int pud_free_pmd_page(pud_t *pud) +int pud_free_pmd_page(pud_t *pud, unsigned long addr) { return pud_none(*pud); } @@ -772,7 +774,7 @@ int pud_free_pmd_page(pud_t *pud) * Disable free page handling on x86-PAE. This assures that ioremap() * does not update sync'd pmd entries. See vmalloc_sync_one(). */ -int pmd_free_pte_page(pmd_t *pmd) +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { return pmd_none(*pmd); } diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 46a2f5d9aa25..f00421dfacbd 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -991,8 +991,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); -int pud_free_pmd_page(pud_t *pud); -int pmd_free_pte_page(pmd_t *pmd); +int pud_free_pmd_page(pud_t *pud, unsigned long addr); +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr); #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) { @@ -1018,11 +1018,11 @@ static inline int pmd_clear_huge(pmd_t *pmd) { return 0; } -static inline int pud_free_pmd_page(pud_t *pud) +static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr) { return 0; } -static inline int pmd_free_pte_page(pmd_t *pmd) +static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { return 0; } diff --git a/lib/ioremap.c b/lib/ioremap.c index 54e5bbaa3200..517f5853ffed 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -92,7 +92,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, if (ioremap_pmd_enabled() && ((next - addr) == PMD_SIZE) && IS_ALIGNED(phys_addr + addr, PMD_SIZE) && - pmd_free_pte_page(pmd)) { + pmd_free_pte_page(pmd, addr)) { if (pmd_set_huge(pmd, phys_addr + addr, prot)) continue; } @@ -119,7 +119,7 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, if (ioremap_pud_enabled() && ((next - addr) == PUD_SIZE) && IS_ALIGNED(phys_addr + addr, PUD_SIZE) && - pud_free_pmd_page(pud)) { + pud_free_pmd_page(pud, addr)) { if (pud_set_huge(pud, phys_addr + addr, prot)) continue; } -- GitLab From 21ed56ef827e3574ffbe67a933e480b4f6f32cf5 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 27 Jun 2018 08:13:48 -0600 Subject: [PATCH 0782/1291] x86/mm: Add TLB purge to free pmd/pte page interfaces commit 5e0fb5df2ee871b841f96f9cb6a7f2784e96aa4e upstream. ioremap() calls pud_free_pmd_page() / pmd_free_pte_page() when it creates a pud / pmd map. The following preconditions are met at their entry. - All pte entries for a target pud/pmd address range have been cleared. - System-wide TLB purges have been peformed for a target pud/pmd address range. The preconditions assure that there is no stale TLB entry for the range. Speculation may not cache TLB entries since it requires all levels of page entries, including ptes, to have P & A-bits set for an associated address. However, speculation may cache pud/pmd entries (paging-structure caches) when they have P-bit set. Add a system-wide TLB purge (INVLPG) to a single page after clearing pud/pmd entry's P-bit. SDM 4.10.4.1, Operation that Invalidate TLBs and Paging-Structure Caches, states that: INVLPG invalidates all paging-structure caches associated with the current PCID regardless of the liner addresses to which they correspond. Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") Signed-off-by: Toshi Kani Signed-off-by: Thomas Gleixner Cc: mhocko@suse.com Cc: akpm@linux-foundation.org Cc: hpa@zytor.com Cc: cpandya@codeaurora.org Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: Joerg Roedel Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: Michal Hocko Cc: "H. Peter Anvin" Cc: Link: https://lkml.kernel.org/r/20180627141348.21777-4-toshi.kani@hpe.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pgtable.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 31286ff04141..2bdb8e8a9d7c 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -718,24 +718,44 @@ int pmd_clear_huge(pmd_t *pmd) * @pud: Pointer to a PUD. * @addr: Virtual address associated with pud. * - * Context: The pud range has been unmaped and TLB purged. + * Context: The pud range has been unmapped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. + * + * NOTE: Callers must allow a single page allocation. */ int pud_free_pmd_page(pud_t *pud, unsigned long addr) { - pmd_t *pmd; + pmd_t *pmd, *pmd_sv; + pte_t *pte; int i; if (pud_none(*pud)) return 1; pmd = (pmd_t *)pud_page_vaddr(*pud); + pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL); + if (!pmd_sv) + return 0; - for (i = 0; i < PTRS_PER_PMD; i++) - if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE))) - return 0; + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd_sv[i] = pmd[i]; + if (!pmd_none(pmd[i])) + pmd_clear(&pmd[i]); + } pud_clear(pud); + + /* INVLPG to clear all paging-structure caches */ + flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); + + for (i = 0; i < PTRS_PER_PMD; i++) { + if (!pmd_none(pmd_sv[i])) { + pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]); + free_page((unsigned long)pte); + } + } + + free_page((unsigned long)pmd_sv); free_page((unsigned long)pmd); return 1; @@ -746,7 +766,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) * @pmd: Pointer to a PMD. * @addr: Virtual address associated with pmd. * - * Context: The pmd range has been unmaped and TLB purged. + * Context: The pmd range has been unmapped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. */ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) @@ -758,6 +778,10 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) pte = (pte_t *)pmd_page_vaddr(*pmd); pmd_clear(pmd); + + /* INVLPG to clear all paging-structure caches */ + flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); + free_page((unsigned long)pte); return 1; -- GitLab From 7251bd56d92540ab75d660e96d4fb1b7131b38da Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 17 Aug 2018 21:01:12 +0200 Subject: [PATCH 0783/1291] Linux 4.14.64 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f3bb9428b3dc..025156791e90 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 63 +SUBLEVEL = 64 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 3f2e4f5dd83478b41ee273a500477cbf20266d23 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 17 Aug 2018 10:27:36 -0700 Subject: [PATCH 0784/1291] x86/speculation/l1tf: Exempt zeroed PTEs from inversion commit f19f5c49bbc3ffcc9126cc245fc1b24cc29f4a37 upstream. It turns out that we should *not* invert all not-present mappings, because the all zeroes case is obviously special. clear_page() does not undergo the XOR logic to invert the address bits, i.e. PTE, PMD and PUD entries that have not been individually written will have val=0 and so will trigger __pte_needs_invert(). As a result, {pte,pmd,pud}_pfn() will return the wrong PFN value, i.e. all ones (adjusted by the max PFN mask) instead of zero. A zeroed entry is ok because the page at physical address 0 is reserved early in boot specifically to mitigate L1TF, so explicitly exempt them from the inversion when reading the PFN. Manifested as an unexpected mprotect(..., PROT_NONE) failure when called on a VMA that has VM_PFNMAP and was mmap'd to as something other than PROT_NONE but never used. mprotect() sends the PROT_NONE request down prot_none_walk(), which walks the PTEs to check the PFNs. prot_none_pte_entry() gets the bogus PFN from pte_pfn() and returns -EACCES because it thinks mprotect() is trying to adjust a high MMIO address. [ This is a very modified version of Sean's original patch, but all credit goes to Sean for doing this and also pointing out that sometimes the __pte_needs_invert() function only gets the protection bits, not the full eventual pte. But zero remains special even in just protection bits, so that's ok. - Linus ] Fixes: f22cc87f6c1f ("x86/speculation/l1tf: Invert all not present mappings") Signed-off-by: Sean Christopherson Acked-by: Andi Kleen Cc: Thomas Gleixner Cc: Josh Poimboeuf Cc: Michal Hocko Cc: Vlastimil Babka Cc: Dave Hansen Cc: Greg Kroah-Hartman Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable-invert.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h index 44b1203ece12..a0c1525f1b6f 100644 --- a/arch/x86/include/asm/pgtable-invert.h +++ b/arch/x86/include/asm/pgtable-invert.h @@ -4,9 +4,18 @@ #ifndef __ASSEMBLY__ +/* + * A clear pte value is special, and doesn't get inverted. + * + * Note that even users that only pass a pgprot_t (rather + * than a full pte) won't trigger the special zero case, + * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED + * set. So the all zero case really is limited to just the + * cleared page table entry case. + */ static inline bool __pte_needs_invert(u64 val) { - return !(val & _PAGE_PRESENT); + return val && !(val & _PAGE_PRESENT); } /* Get a mask to xor with the page table entry to get the correct pfn. */ -- GitLab From 4cea13b66144903ae7310331b43e08f7b2d6aadb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 18 Aug 2018 10:48:00 +0200 Subject: [PATCH 0785/1291] Linux 4.14.65 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 025156791e90..7995690ff1aa 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 64 +SUBLEVEL = 65 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 64d9b03d082a2b0665ee646459ee60aded8734a0 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Tue, 7 Aug 2018 20:03:57 +0300 Subject: [PATCH 0786/1291] dccp: fix undefined behavior with 'cwnd' shift in ccid2_cwnd_restart() [ Upstream commit 61ef4b07fcdc30535889990cf4229766502561cf ] The shift of 'cwnd' with '(now - hc->tx_lsndtime) / hc->tx_rto' value can lead to undefined behavior [1]. In order to fix this use a gradual shift of the window with a 'while' loop, similar to what tcp_cwnd_restart() is doing. When comparing delta and RTO there is a minor difference between TCP and DCCP, the last one also invokes dccp_cwnd_restart() and reduces 'cwnd' if delta equals RTO. That case is preserved in this change. [1]: [40850.963623] UBSAN: Undefined behaviour in net/dccp/ccids/ccid2.c:237:7 [40851.043858] shift exponent 67 is too large for 32-bit type 'unsigned int' [40851.127163] CPU: 3 PID: 15940 Comm: netstress Tainted: G W E 4.18.0-rc7.x86_64 #1 ... [40851.377176] Call Trace: [40851.408503] dump_stack+0xf1/0x17b [40851.451331] ? show_regs_print_info+0x5/0x5 [40851.503555] ubsan_epilogue+0x9/0x7c [40851.548363] __ubsan_handle_shift_out_of_bounds+0x25b/0x2b4 [40851.617109] ? __ubsan_handle_load_invalid_value+0x18f/0x18f [40851.686796] ? xfrm4_output_finish+0x80/0x80 [40851.739827] ? lock_downgrade+0x6d0/0x6d0 [40851.789744] ? xfrm4_prepare_output+0x160/0x160 [40851.845912] ? ip_queue_xmit+0x810/0x1db0 [40851.895845] ? ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp] [40851.963530] ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp] [40852.029063] dccp_xmit_packet+0x1d3/0x720 [dccp] [40852.086254] dccp_write_xmit+0x116/0x1d0 [dccp] [40852.142412] dccp_sendmsg+0x428/0xb20 [dccp] [40852.195454] ? inet_dccp_listen+0x200/0x200 [dccp] [40852.254833] ? sched_clock+0x5/0x10 [40852.298508] ? sched_clock+0x5/0x10 [40852.342194] ? inet_create+0xdf0/0xdf0 [40852.388988] sock_sendmsg+0xd9/0x160 ... Fixes: 113ced1f52e5 ("dccp ccid-2: Perform congestion-window validation") Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ccids/ccid2.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 3887bc115762..fc31c02d616c 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -228,14 +228,16 @@ static void ccid2_cwnd_restart(struct sock *sk, const u32 now) struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); u32 cwnd = hc->tx_cwnd, restart_cwnd, iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache); + s32 delta = now - hc->tx_lsndtime; hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2)); /* don't reduce cwnd below the initial window (IW) */ restart_cwnd = min(cwnd, iwnd); - cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto; - hc->tx_cwnd = max(cwnd, restart_cwnd); + while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd) + cwnd >>= 1; + hc->tx_cwnd = max(cwnd, restart_cwnd); hc->tx_cwnd_stamp = now; hc->tx_cwnd_used = 0; -- GitLab From 46be8e4471d7159dc4ce14289d2f244956754ba9 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 10 Aug 2018 11:14:56 -0700 Subject: [PATCH 0787/1291] l2tp: use sk_dst_check() to avoid race on sk->sk_dst_cache [ Upstream commit 6d37fa49da1e8db8fb1995be22ac837ca41ac8a8 ] In l2tp code, if it is a L2TP_UDP_ENCAP tunnel, tunnel->sk points to a UDP socket. User could call sendmsg() on both this tunnel and the UDP socket itself concurrently. As l2tp_xmit_skb() holds socket lock and call __sk_dst_check() to refresh sk->sk_dst_cache, while udpv6_sendmsg() is lockless and call sk_dst_check() to refresh sk->sk_dst_cache, there could be a race and cause the dst cache to be freed multiple times. So we fix l2tp side code to always call sk_dst_check() to garantee xchg() is called when refreshing sk->sk_dst_cache to avoid race conditions. Syzkaller reported stack trace: BUG: KASAN: use-after-free in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] BUG: KASAN: use-after-free in atomic_fetch_add_unless include/linux/atomic.h:575 [inline] BUG: KASAN: use-after-free in atomic_add_unless include/linux/atomic.h:597 [inline] BUG: KASAN: use-after-free in dst_hold_safe include/net/dst.h:308 [inline] BUG: KASAN: use-after-free in ip6_hold_safe+0xe6/0x670 net/ipv6/route.c:1029 Read of size 4 at addr ffff8801aea9a880 by task syz-executor129/4829 CPU: 0 PID: 4829 Comm: syz-executor129 Not tainted 4.18.0-rc7-next-20180802+ #30 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 print_address_description+0x6c/0x20b mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.7+0x242/0x30d mm/kasan/report.c:412 check_memory_region_inline mm/kasan/kasan.c:260 [inline] check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267 kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272 atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] atomic_fetch_add_unless include/linux/atomic.h:575 [inline] atomic_add_unless include/linux/atomic.h:597 [inline] dst_hold_safe include/net/dst.h:308 [inline] ip6_hold_safe+0xe6/0x670 net/ipv6/route.c:1029 rt6_get_pcpu_route net/ipv6/route.c:1249 [inline] ip6_pol_route+0x354/0xd20 net/ipv6/route.c:1922 ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2098 fib6_rule_lookup+0x283/0x890 net/ipv6/fib6_rules.c:122 ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2126 ip6_dst_lookup_tail+0x1278/0x1da0 net/ipv6/ip6_output.c:978 ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1079 ip6_sk_dst_lookup_flow+0x5ed/0xc50 net/ipv6/ip6_output.c:1117 udpv6_sendmsg+0x2163/0x36b0 net/ipv6/udp.c:1354 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:632 ___sys_sendmsg+0x51d/0x930 net/socket.c:2115 __sys_sendmmsg+0x240/0x6f0 net/socket.c:2210 __do_sys_sendmmsg net/socket.c:2239 [inline] __se_sys_sendmmsg net/socket.c:2236 [inline] __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2236 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x446a29 Code: e8 ac b8 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f4de5532db8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00000000006dcc38 RCX: 0000000000446a29 RDX: 00000000000000b8 RSI: 0000000020001b00 RDI: 0000000000000003 RBP: 00000000006dcc30 R08: 00007f4de5533700 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dcc3c R13: 00007ffe2b830fdf R14: 00007f4de55339c0 R15: 0000000000000001 Fixes: 71b1391a4128 ("l2tp: ensure sk->dst is still valid") Reported-by: syzbot+05f840f3b04f211bad55@syzkaller.appspotmail.com Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Cc: Guillaume Nault Cc: David Ahern Cc: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 316869df91e8..5c87f1d3e525 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1211,7 +1211,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len /* Get routing info from the tunnel socket */ skb_dst_drop(skb); - skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0))); + skb_dst_set(skb, sk_dst_check(sk, 0)); inet = inet_sk(sk); fl = &inet->cork.fl; -- GitLab From 222e05e0a29a4537575103a0e9118e385e928766 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 7 Aug 2018 12:41:38 -0700 Subject: [PATCH 0788/1291] llc: use refcount_inc_not_zero() for llc_sap_find() [ Upstream commit 0dcb82254d65f72333aa50ad626d1e9665ad093b ] llc_sap_put() decreases the refcnt before deleting sap from the global list. Therefore, there is a chance llc_sap_find() could find a sap with zero refcnt in this global list. Close this race condition by checking if refcnt is zero or not in llc_sap_find(), if it is zero then it is being removed so we can just treat it as gone. Reported-by: Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/llc.h | 5 +++++ net/llc/llc_core.c | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/net/llc.h b/include/net/llc.h index dc35f25eb679..890a87318014 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -116,6 +116,11 @@ static inline void llc_sap_hold(struct llc_sap *sap) refcount_inc(&sap->refcnt); } +static inline bool llc_sap_hold_safe(struct llc_sap *sap) +{ + return refcount_inc_not_zero(&sap->refcnt); +} + void llc_sap_close(struct llc_sap *sap); static inline void llc_sap_put(struct llc_sap *sap) diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 89041260784c..260b3dc1b4a2 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -73,8 +73,8 @@ struct llc_sap *llc_sap_find(unsigned char sap_value) rcu_read_lock_bh(); sap = __llc_sap_find(sap_value); - if (sap) - llc_sap_hold(sap); + if (!sap || !llc_sap_hold_safe(sap)) + sap = NULL; rcu_read_unlock_bh(); return sap; } -- GitLab From 4396f5024b9e356c2ba069be8e92e0b113900d49 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 13 Aug 2018 18:44:03 +0800 Subject: [PATCH 0789/1291] net_sched: fix NULL pointer dereference when delete tcindex filter [ Upstream commit 2df8bee5654bb2b7312662ca6810d4dc16b0b67f ] Li Shuang reported the following crash: [ 71.267724] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [ 71.276456] PGD 800000085d9bd067 P4D 800000085d9bd067 PUD 859a0b067 PMD 0 [ 71.284127] Oops: 0000 [#1] SMP PTI [ 71.288015] CPU: 12 PID: 2386 Comm: tc Not tainted 4.18.0-rc8.latest+ #131 [ 71.295686] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016 [ 71.304037] RIP: 0010:tcindex_delete+0x72/0x280 [cls_tcindex] [ 71.310446] Code: 00 31 f6 48 87 75 20 48 85 f6 74 11 48 8b 47 18 48 8b 40 08 48 8b 40 50 e8 fb a6 f8 fc 48 85 db 0f 84 dc 00 00 00 48 8b 73 18 <8b> 56 04 48 8d 7e 04 85 d2 0f 84 7b 01 00 [ 71.331517] RSP: 0018:ffffb45207b3f898 EFLAGS: 00010282 [ 71.337345] RAX: ffff8ad3d72d6360 RBX: ffff8acc84393680 RCX: 000000000000002e [ 71.345306] RDX: ffff8ad3d72c8570 RSI: 0000000000000000 RDI: ffff8ad847a45800 [ 71.353277] RBP: ffff8acc84393688 R08: ffff8ad3d72c8400 R09: 0000000000000000 [ 71.361238] R10: ffff8ad3de786e00 R11: 0000000000000000 R12: ffffb45207b3f8c7 [ 71.369199] R13: ffff8ad3d93bd2a0 R14: 000000000000002e R15: ffff8ad3d72c9600 [ 71.377161] FS: 00007f9d3ec3e740(0000) GS:ffff8ad3df980000(0000) knlGS:0000000000000000 [ 71.386188] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 71.392597] CR2: 0000000000000004 CR3: 0000000852f06003 CR4: 00000000001606e0 [ 71.400558] Call Trace: [ 71.403299] tcindex_destroy_element+0x25/0x40 [cls_tcindex] [ 71.409611] tcindex_walk+0xbb/0x110 [cls_tcindex] [ 71.414953] tcindex_destroy+0x44/0x90 [cls_tcindex] [ 71.420492] ? tcindex_delete+0x280/0x280 [cls_tcindex] [ 71.426323] tcf_proto_destroy+0x16/0x40 [ 71.430696] tcf_chain_flush+0x51/0x70 [ 71.434876] tcf_block_put_ext.part.30+0x8f/0x1b0 [ 71.440122] tcf_block_put+0x4d/0x70 [ 71.444108] cbq_destroy+0x4d/0xd0 [sch_cbq] [ 71.448869] qdisc_destroy+0x62/0x130 [ 71.452951] dsmark_destroy+0x2a/0x70 [sch_dsmark] [ 71.458300] qdisc_destroy+0x62/0x130 [ 71.462373] qdisc_graft+0x3ba/0x470 [ 71.466359] tc_get_qdisc+0x2a6/0x2c0 [ 71.470443] ? cred_has_capability+0x7d/0x130 [ 71.475307] rtnetlink_rcv_msg+0x263/0x2d0 [ 71.479875] ? rtnl_calcit.isra.30+0x110/0x110 [ 71.484832] netlink_rcv_skb+0x4d/0x130 [ 71.489109] netlink_unicast+0x1a3/0x250 [ 71.493482] netlink_sendmsg+0x2ae/0x3a0 [ 71.497859] sock_sendmsg+0x36/0x40 [ 71.501748] ___sys_sendmsg+0x26f/0x2d0 [ 71.506029] ? handle_pte_fault+0x586/0xdf0 [ 71.510694] ? __handle_mm_fault+0x389/0x500 [ 71.515457] ? __sys_sendmsg+0x5e/0xa0 [ 71.519636] __sys_sendmsg+0x5e/0xa0 [ 71.523626] do_syscall_64+0x5b/0x180 [ 71.527711] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 71.533345] RIP: 0033:0x7f9d3e257f10 [ 71.537331] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 [ 71.558401] RSP: 002b:00007fff6f893398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 71.566848] RAX: ffffffffffffffda RBX: 000000005b71274d RCX: 00007f9d3e257f10 [ 71.574810] RDX: 0000000000000000 RSI: 00007fff6f8933e0 RDI: 0000000000000003 [ 71.582770] RBP: 00007fff6f8933e0 R08: 000000000000ffff R09: 0000000000000003 [ 71.590729] R10: 00007fff6f892e20 R11: 0000000000000246 R12: 0000000000000000 [ 71.598689] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000 [ 71.606651] Modules linked in: sch_cbq cls_tcindex sch_dsmark xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_coni [ 71.685425] libahci i2c_algo_bit i2c_core i40e libata dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod [ 71.697075] CR2: 0000000000000004 [ 71.700792] ---[ end trace f604eb1acacd978b ]--- Reproducer: tc qdisc add dev lo handle 1:0 root dsmark indices 64 set_tc_index tc filter add dev lo parent 1:0 protocol ip prio 1 tcindex mask 0xfc shift 2 tc qdisc add dev lo parent 1:0 handle 2:0 cbq bandwidth 10Mbit cell 8 avpkt 1000 mpu 64 tc class add dev lo parent 2:0 classid 2:1 cbq bandwidth 10Mbit rate 1500Kbit avpkt 1000 prio 1 bounded isolated allot 1514 weight 1 maxburst 10 tc filter add dev lo parent 2:0 protocol ip prio 1 handle 0x2e tcindex classid 2:1 pass_on tc qdisc add dev lo parent 2:1 pfifo limit 5 tc qdisc del dev lo root This is because in tcindex_set_parms, when there is no old_r, we set new exts to cr.exts. And we didn't set it to filter when r == &new_filter_result. Then in tcindex_delete() -> tcf_exts_get_net(), we will get NULL pointer dereference as we didn't init exts. Fix it by moving tcf_exts_change() after "if (old_r && old_r != r)" check. Then we don't need "cr" as there is no errout after that. Fixes: bf63ac73b3e13 ("net_sched: fix an oops in tcindex filter") Reported-by: Li Shuang Signed-off-by: Hangbin Liu Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_tcindex.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index a76937ee0b2d..326d373e34c9 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -464,11 +464,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, tcf_bind_filter(tp, &cr.res, base); } - if (old_r) - tcf_exts_change(&r->exts, &e); - else - tcf_exts_change(&cr.exts, &e); - if (old_r && old_r != r) { err = tcindex_filter_result_init(old_r); if (err < 0) { @@ -479,6 +474,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, oldp = p; r->res = cr.res; + tcf_exts_change(&r->exts, &e); + rcu_assign_pointer(tp->root, cp); if (r == &new_filter_result) { -- GitLab From 36e55fde4ccf2e4a6e250ce6ded00c7b72fb7a46 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Aug 2018 11:06:02 -0700 Subject: [PATCH 0790/1291] vsock: split dwork to avoid reinitializations [ Upstream commit 455f05ecd2b219e9a216050796d30c830d9bc393 ] syzbot reported that we reinitialize an active delayed work in vsock_stream_connect(): ODEBUG: init active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x90 kernel/workqueue.c:1414 WARNING: CPU: 1 PID: 11518 at lib/debugobjects.c:329 debug_print_object+0x16a/0x210 lib/debugobjects.c:326 The pattern is apparently wrong, we should only initialize the dealyed work once and could repeatly schedule it. So we have to move out the initializations to allocation side. And to avoid confusion, we can split the shared dwork into two, instead of re-using the same one. Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Reported-by: Cc: Andy king Cc: Stefan Hajnoczi Cc: Jorgen Hansen Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/af_vsock.h | 4 ++-- net/vmw_vsock/af_vsock.c | 15 ++++++++------- net/vmw_vsock/vmci_transport.c | 3 +-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index f9fb566e75cf..5fb3f6361090 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -62,7 +62,8 @@ struct vsock_sock { struct list_head pending_links; struct list_head accept_queue; bool rejected; - struct delayed_work dwork; + struct delayed_work connect_work; + struct delayed_work pending_work; struct delayed_work close_work; bool close_work_scheduled; u32 peer_shutdown; @@ -75,7 +76,6 @@ struct vsock_sock { s64 vsock_stream_has_data(struct vsock_sock *vsk); s64 vsock_stream_has_space(struct vsock_sock *vsk); -void vsock_pending_work(struct work_struct *work); struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index dfc8c51e4d74..f2fd556c1233 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -449,14 +449,14 @@ static int vsock_send_shutdown(struct sock *sk, int mode) return transport->shutdown(vsock_sk(sk), mode); } -void vsock_pending_work(struct work_struct *work) +static void vsock_pending_work(struct work_struct *work) { struct sock *sk; struct sock *listener; struct vsock_sock *vsk; bool cleanup; - vsk = container_of(work, struct vsock_sock, dwork.work); + vsk = container_of(work, struct vsock_sock, pending_work.work); sk = sk_vsock(vsk); listener = vsk->listener; cleanup = true; @@ -496,7 +496,6 @@ void vsock_pending_work(struct work_struct *work) sock_put(sk); sock_put(listener); } -EXPORT_SYMBOL_GPL(vsock_pending_work); /**** SOCKET OPERATIONS ****/ @@ -595,6 +594,8 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) return retval; } +static void vsock_connect_timeout(struct work_struct *work); + struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, @@ -637,6 +638,8 @@ struct sock *__vsock_create(struct net *net, vsk->sent_request = false; vsk->ignore_connecting_rst = false; vsk->peer_shutdown = 0; + INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout); + INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work); psk = parent ? vsock_sk(parent) : NULL; if (parent) { @@ -1116,7 +1119,7 @@ static void vsock_connect_timeout(struct work_struct *work) struct vsock_sock *vsk; int cancel = 0; - vsk = container_of(work, struct vsock_sock, dwork.work); + vsk = container_of(work, struct vsock_sock, connect_work.work); sk = sk_vsock(vsk); lock_sock(sk); @@ -1220,9 +1223,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, * timeout fires. */ sock_hold(sk); - INIT_DELAYED_WORK(&vsk->dwork, - vsock_connect_timeout); - schedule_delayed_work(&vsk->dwork, timeout); + schedule_delayed_work(&vsk->connect_work, timeout); /* Skip ahead to preserve error code set above. */ goto out_wait; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 10ae7823a19d..d5be519b0271 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1091,8 +1091,7 @@ static int vmci_transport_recv_listen(struct sock *sk, vpending->listener = sk; sock_hold(sk); sock_hold(pending); - INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work); - schedule_delayed_work(&vpending->dwork, HZ); + schedule_delayed_work(&vpending->pending_work, HZ); out: return err; -- GitLab From e125b1cc673adc14290a9c820ab2a27b3a7f75ec Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 13 Aug 2018 18:44:04 +0800 Subject: [PATCH 0791/1291] net_sched: Fix missing res info when create new tc_index filter [ Upstream commit 008369dcc5f7bfba526c98054f8525322acf0ea3 ] Li Shuang reported the following warn: [ 733.484610] WARNING: CPU: 6 PID: 21123 at net/sched/sch_cbq.c:1418 cbq_destroy_class+0x5d/0x70 [sch_cbq] [ 733.495190] Modules linked in: sch_cbq cls_tcindex sch_dsmark rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat l [ 733.574155] syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm igb ixgbe ahci libahci i2c_algo_bit libata i40e i2c_core dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod [ 733.592500] CPU: 6 PID: 21123 Comm: tc Not tainted 4.18.0-rc8.latest+ #131 [ 733.600169] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016 [ 733.608518] RIP: 0010:cbq_destroy_class+0x5d/0x70 [sch_cbq] [ 733.614734] Code: e7 d9 d2 48 8b 7b 48 e8 61 05 da d2 48 8d bb f8 00 00 00 e8 75 ae d5 d2 48 39 eb 74 0a 48 89 df 5b 5d e9 16 6c 94 d2 5b 5d c3 <0f> 0b eb b6 0f 1f 44 00 00 66 2e 0f 1f 84 [ 733.635798] RSP: 0018:ffffbfbb066bb9d8 EFLAGS: 00010202 [ 733.641627] RAX: 0000000000000001 RBX: ffff9cdd17392800 RCX: 000000008010000f [ 733.649588] RDX: ffff9cdd1df547e0 RSI: ffff9cdd17392800 RDI: ffff9cdd0f84c800 [ 733.657547] RBP: ffff9cdd0f84c800 R08: 0000000000000001 R09: 0000000000000000 [ 733.665508] R10: ffff9cdd0f84d000 R11: 0000000000000001 R12: 0000000000000001 [ 733.673469] R13: 0000000000000000 R14: 0000000000000001 R15: ffff9cdd17392200 [ 733.681430] FS: 00007f911890a740(0000) GS:ffff9cdd1f8c0000(0000) knlGS:0000000000000000 [ 733.690456] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 733.696864] CR2: 0000000000b5544c CR3: 0000000859374002 CR4: 00000000001606e0 [ 733.704826] Call Trace: [ 733.707554] cbq_destroy+0xa1/0xd0 [sch_cbq] [ 733.712318] qdisc_destroy+0x62/0x130 [ 733.716401] dsmark_destroy+0x2a/0x70 [sch_dsmark] [ 733.721745] qdisc_destroy+0x62/0x130 [ 733.725829] qdisc_graft+0x3ba/0x470 [ 733.729817] tc_get_qdisc+0x2a6/0x2c0 [ 733.733901] ? cred_has_capability+0x7d/0x130 [ 733.738761] rtnetlink_rcv_msg+0x263/0x2d0 [ 733.743330] ? rtnl_calcit.isra.30+0x110/0x110 [ 733.748287] netlink_rcv_skb+0x4d/0x130 [ 733.752576] netlink_unicast+0x1a3/0x250 [ 733.756949] netlink_sendmsg+0x2ae/0x3a0 [ 733.761324] sock_sendmsg+0x36/0x40 [ 733.765213] ___sys_sendmsg+0x26f/0x2d0 [ 733.769493] ? handle_pte_fault+0x586/0xdf0 [ 733.774158] ? __handle_mm_fault+0x389/0x500 [ 733.778919] ? __sys_sendmsg+0x5e/0xa0 [ 733.783099] __sys_sendmsg+0x5e/0xa0 [ 733.787087] do_syscall_64+0x5b/0x180 [ 733.791171] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 733.796805] RIP: 0033:0x7f9117f23f10 [ 733.800791] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 [ 733.821873] RSP: 002b:00007ffe96818398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 733.830319] RAX: ffffffffffffffda RBX: 000000005b71244c RCX: 00007f9117f23f10 [ 733.838280] RDX: 0000000000000000 RSI: 00007ffe968183e0 RDI: 0000000000000003 [ 733.846241] RBP: 00007ffe968183e0 R08: 000000000000ffff R09: 0000000000000003 [ 733.854202] R10: 00007ffe96817e20 R11: 0000000000000246 R12: 0000000000000000 [ 733.862161] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000 [ 733.870121] ---[ end trace 28edd4aad712ddca ]--- This is because we didn't update f->result.res when create new filter. Then in tcindex_delete() -> tcf_unbind_filter(), we will failed to find out the res and unbind filter, which will trigger the WARN_ON() in cbq_destroy_class(). Fix it by updating f->result.res when create new filter. Fixes: 6e0565697a106 ("net_sched: fix another crash in cls_tcindex") Reported-by: Li Shuang Signed-off-by: Hangbin Liu Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_tcindex.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 326d373e34c9..52829fdc280b 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -482,6 +482,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcindex_filter *nfp; struct tcindex_filter __rcu **fp; + f->result.res = r->res; tcf_exts_change(&f->result.exts, &r->exts); fp = cp->h + (handle % cp->hash); -- GitLab From 59f9f2c76fe16026f293d87ae6001f80ccab420b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 8 Aug 2018 11:43:04 +0800 Subject: [PATCH 0792/1291] vhost: reset metadata cache when initializing new IOTLB [ Upstream commit b13f9c6364373a1b9f71e9846dc4fb199296f926 ] We need to reset metadata cache during new IOTLB initialization, otherwise the stale pointers to previous IOTLB may be still accessed which will lead a use after free. Reported-by: syzbot+c51e6736a1bf614b3272@syzkaller.appspotmail.com Fixes: f88949138058 ("vhost: introduce O(1) vq metadata cache") Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 244e5256c526..3cf74f54c7a1 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1578,9 +1578,12 @@ int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled) d->iotlb = niotlb; for (i = 0; i < d->nvqs; ++i) { - mutex_lock(&d->vqs[i]->mutex); - d->vqs[i]->iotlb = niotlb; - mutex_unlock(&d->vqs[i]->mutex); + struct vhost_virtqueue *vq = d->vqs[i]; + + mutex_lock(&vq->mutex); + vq->iotlb = niotlb; + __vhost_vq_meta_reset(vq); + mutex_unlock(&vq->mutex); } vhost_umem_clean(oiotlb); -- GitLab From b3fb9398723238f39e3ff0c9c7f0d1cadf04738a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 5 Aug 2018 22:46:07 +0800 Subject: [PATCH 0793/1291] ip6_tunnel: use the right value for ipv4 min mtu check in ip6_tnl_xmit [ Upstream commit 82a40777de12728dedf4075453b694f0d1baee80 ] According to RFC791, 68 bytes is the minimum size of IPv4 datagram every device must be able to forward without further fragmentation while 576 bytes is the minimum size of IPv4 datagram every device has to be able to receive, so in ip6_tnl_xmit(), 68(IPV4_MIN_MTU) should be the right value for the ipv4 min mtu check in ip6_tnl_xmit. While at it, change to use max() instead of if statement. Fixes: c9fefa08190f ("ip6_tunnel: get the min mtu properly in ip6_tnl_xmit") Reported-by: Sabrina Dubroca Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 84ee2eb88121..ee8dbd228fe2 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1134,12 +1134,8 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, max_headroom += 8; mtu -= 8; } - if (skb->protocol == htons(ETH_P_IPV6)) { - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; - } else if (mtu < 576) { - mtu = 576; - } + mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ? + IPV6_MIN_MTU : IPV4_MIN_MTU); skb_dst_update_pmtu(skb, mtu); if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { -- GitLab From 50670d158cfa16c2fad5e3c81cf57a0777d450a7 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Wed, 8 Aug 2018 14:06:32 +0300 Subject: [PATCH 0794/1291] net: aquantia: Fix IFF_ALLMULTI flag functionality [ Upstream commit 11ba961c916127651e12af6cad3891f8aeb25aa9 ] It was noticed that NIC always pass all multicast traffic to the host regardless of IFF_ALLMULTI flag on the interface. The rule in MC Filter Table in NIC, that is configured to accept any multicast packets, is turning on if IFF_MULTICAST flag is set on the interface. It leads to passing all multicast traffic to the host. This fix changes the condition to turn on that rule by checking IFF_ALLMULTI flag as it should. Fixes: b21f502f84be ("net:ethernet:aquantia: Fix for multicast filter handling.") Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 36fddb199160..f4b3554b0b67 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -752,7 +752,7 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self, rpfl2promiscuous_mode_en_set(self, IS_FILTER_ENABLED(IFF_PROMISC)); rpfl2multicast_flr_en_set(self, - IS_FILTER_ENABLED(IFF_MULTICAST), 0); + IS_FILTER_ENABLED(IFF_ALLMULTI), 0); rpfl2_accept_all_mc_packets_set(self, IS_FILTER_ENABLED(IFF_ALLMULTI)); -- GitLab From 7f4c155214ae9b5f2e811d7b0226ae911b667f6d Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Sat, 28 Jul 2018 03:16:42 +0900 Subject: [PATCH 0795/1291] ALSA: hda - Sleep for 10ms after entering D3 on Conexant codecs commit f59cf9a0551dd954ad8b752461cf19d9789f4b1d upstream. On rare occasions, we are still noticing that the internal speaker spitting out spurious noises even after adding the problematic codec to the list. Adding a 10ms artificial delay before rebooting fixes the issue entirely. Patch for Realtek codecs also adds the same amount of delay after entering D3. Signed-off-by: Park Ju Hyung Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 88ce2f1022e1..2a140172a972 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -225,6 +225,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec) snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3); snd_hda_codec_write(codec, codec->core.afg, 0, AC_VERB_SET_POWER_STATE, AC_PWRST_D3); + msleep(10); } static void cx_auto_free(struct hda_codec *codec) -- GitLab From f7de68a8d6cd32c1e000a62fe20a7fc529af6851 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Sat, 28 Jul 2018 03:16:21 +0900 Subject: [PATCH 0796/1291] ALSA: hda - Turn CX8200 into D3 as well upon reboot commit d77a4b4a5b0b2ebcbc9840995d91311ef28302ab upstream. As an equivalent codec with CX20724, CX8200 is also subject to the reboot bug. Late 2017 and 2018 LG Gram and some HP Spectre laptops are known victims to this issue, causing extremely loud noises upon reboot. Now that we know that this bug is subject to multiple codecs, fix the comment as well. Signed-off-by: Park Ju Hyung Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 2a140172a972..16197ad4512a 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -211,6 +211,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec) struct conexant_spec *spec = codec->spec; switch (codec->core.vendor_id) { + case 0x14f12008: /* CX8200 */ case 0x14f150f2: /* CX20722 */ case 0x14f150f4: /* CX20724 */ break; @@ -218,7 +219,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec) return; } - /* Turn the CX20722 codec into D3 to avoid spurious noises + /* Turn the problematic codec into D3 to avoid spurious noises from the internal speaker during (and after) reboot */ cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false); -- GitLab From f13a825ae2c6122ae53f1f12ed95ac77a7cc9d9b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 Jul 2018 17:10:11 +0200 Subject: [PATCH 0797/1291] ALSA: vx222: Fix invalid endian conversions commit fff71a4c050ba46e305d910c837b99ba1728135e upstream. The endian conversions used in vx2_dma_read() and vx2_dma_write() are superfluous and even wrong on big-endian machines, as inl() and outl() already do conversions. Kill them. Spotted by sparse, a warning like: sound/pci/vx222/vx222_ops.c:278:30: warning: incorrect type in argument 1 (different base types) Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/vx222/vx222_ops.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/vx222/vx222_ops.c b/sound/pci/vx222/vx222_ops.c index d4298af6d3ee..c0d0bf44f365 100644 --- a/sound/pci/vx222/vx222_ops.c +++ b/sound/pci/vx222/vx222_ops.c @@ -275,7 +275,7 @@ static void vx2_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime, length >>= 2; /* in 32bit words */ /* Transfer using pseudo-dma. */ for (; length > 0; length--) { - outl(cpu_to_le32(*addr), port); + outl(*addr, port); addr++; } addr = (u32 *)runtime->dma_area; @@ -285,7 +285,7 @@ static void vx2_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime, count >>= 2; /* in 32bit words */ /* Transfer using pseudo-dma. */ for (; count > 0; count--) { - outl(cpu_to_le32(*addr), port); + outl(*addr, port); addr++; } @@ -313,7 +313,7 @@ static void vx2_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime, length >>= 2; /* in 32bit words */ /* Transfer using pseudo-dma. */ for (; length > 0; length--) - *addr++ = le32_to_cpu(inl(port)); + *addr++ = inl(port); addr = (u32 *)runtime->dma_area; pipe->hw_ptr = 0; } @@ -321,7 +321,7 @@ static void vx2_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime, count >>= 2; /* in 32bit words */ /* Transfer using pseudo-dma. */ for (; count > 0; count--) - *addr++ = le32_to_cpu(inl(port)); + *addr++ = inl(port); vx2_release_pseudo_dma(chip); } -- GitLab From f4253b46437ae26b0feb0482b06d4ffccf97934e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 26 Jul 2018 14:27:59 +0200 Subject: [PATCH 0798/1291] ALSA: virmidi: Fix too long output trigger loop commit 50e9ffb1996a5d11ff5040a266585bad4ceeca0a upstream. The virmidi output trigger tries to parse the all available bytes and process sequencer events as much as possible. In a normal situation, this is supposed to be relatively short, but a program may give a huge buffer and it'll take a long time in a single spin lock, which may eventually lead to a soft lockup. This patch simply adds a workaround, a cond_resched() call in the loop if applicable. A better solution would be to move the event processor into a work, but let's put a duct-tape quickly at first. Reported-and-tested-by: Dae R. Jeong Reported-by: syzbot+619d9f40141d826b097e@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_virmidi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index 289ae6bb81d9..8ebbca554e99 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -163,6 +163,7 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream, int count, res; unsigned char buf[32], *pbuf; unsigned long flags; + bool check_resched = !in_atomic(); if (up) { vmidi->trigger = 1; @@ -200,6 +201,15 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream, vmidi->event.type = SNDRV_SEQ_EVENT_NONE; } } + if (!check_resched) + continue; + /* do temporary unlock & cond_resched() for avoiding + * CPU soft lockup, which may happen via a write from + * a huge rawmidi buffer + */ + spin_unlock_irqrestore(&substream->runtime->lock, flags); + cond_resched(); + spin_lock_irqsave(&substream->runtime->lock, flags); } out: spin_unlock_irqrestore(&substream->runtime->lock, flags); -- GitLab From 6b4af40779cc7ad0709fe657c793d53fe0637df3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 Jul 2018 17:59:26 +0200 Subject: [PATCH 0799/1291] ALSA: cs5535audio: Fix invalid endian conversion commit 69756930f2de0457d51db7d505a1e4f40e9fd116 upstream. One place in cs5535audio_build_dma_packets() does an extra conversion via cpu_to_le32(); namely jmpprd_addr is passed to setup_prd() ops, which writes the value via cs_writel(). That is, the callback does the conversion by itself, and we don't need to convert beforehand. This patch fixes that bogus conversion. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/cs5535audio/cs5535audio.h | 6 +++--- sound/pci/cs5535audio/cs5535audio_pcm.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/pci/cs5535audio/cs5535audio.h b/sound/pci/cs5535audio/cs5535audio.h index f4fcdf93f3c8..d84620a0c26c 100644 --- a/sound/pci/cs5535audio/cs5535audio.h +++ b/sound/pci/cs5535audio/cs5535audio.h @@ -67,9 +67,9 @@ struct cs5535audio_dma_ops { }; struct cs5535audio_dma_desc { - u32 addr; - u16 size; - u16 ctlreserved; + __le32 addr; + __le16 size; + __le16 ctlreserved; }; struct cs5535audio_dma { diff --git a/sound/pci/cs5535audio/cs5535audio_pcm.c b/sound/pci/cs5535audio/cs5535audio_pcm.c index ee7065f6e162..326caec854e1 100644 --- a/sound/pci/cs5535audio/cs5535audio_pcm.c +++ b/sound/pci/cs5535audio/cs5535audio_pcm.c @@ -158,8 +158,8 @@ static int cs5535audio_build_dma_packets(struct cs5535audio *cs5535au, lastdesc->addr = cpu_to_le32((u32) dma->desc_buf.addr); lastdesc->size = 0; lastdesc->ctlreserved = cpu_to_le16(PRD_JMP); - jmpprd_addr = cpu_to_le32(lastdesc->addr + - (sizeof(struct cs5535audio_dma_desc)*periods)); + jmpprd_addr = (u32)dma->desc_buf.addr + + sizeof(struct cs5535audio_dma_desc) * periods; dma->substream = substream; dma->period_bytes = period_bytes; -- GitLab From fda20e719212b8b5406394b1c30db35402084c82 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 2 Aug 2018 14:04:45 +0200 Subject: [PATCH 0800/1291] ALSA: hda: Correct Asrock B85M-ITX power_save blacklist entry commit 8e82a728792bf66b9f0a29c9d4c4b0630f7b9c79 upstream. I added the subsys product-id for the HDMI HDA device rather then for the PCH one, this commit fixes this. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1525104 Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 62fbdbe74b93..22c13ad6a9ae 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2205,7 +2205,7 @@ static int azx_probe(struct pci_dev *pci, */ static struct snd_pci_quirk power_save_blacklist[] = { /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ - SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0), + SND_PCI_QUIRK(0x1849, 0xc892, "Asrock B85M-ITX", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */ -- GitLab From 79018e171ae5dd3178f199b7731f82e7dbdefb5f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 19 Jul 2018 11:01:04 +0200 Subject: [PATCH 0801/1291] ALSA: memalloc: Don't exceed over the requested size commit dfef01e150824b0e6da750cacda8958188d29aea upstream. snd_dma_alloc_pages_fallback() tries to allocate pages again when the allocation fails with reduced size. But the first try actually *increases* the size to power-of-two, which may give back a larger chunk than the requested size. This confuses the callers, e.g. sgbuf assumes that the size is equal or less, and it may result in a bad loop due to the underflow and eventually lead to Oops. The code of this function seems incorrectly assuming the usage of get_order(). We need to decrease at first, then align to power-of-two. Reported-and-tested-by: he, bo Reported-by: zhang jun Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/memalloc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 7f89d3c79a4b..753d5fc4b284 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -242,16 +242,12 @@ int snd_dma_alloc_pages_fallback(int type, struct device *device, size_t size, int err; while ((err = snd_dma_alloc_pages(type, device, size, dmab)) < 0) { - size_t aligned_size; if (err != -ENOMEM) return err; if (size <= PAGE_SIZE) return -ENOMEM; - aligned_size = PAGE_SIZE << get_order(size); - if (size != aligned_size) - size = aligned_size; - else - size >>= 1; + size >>= 1; + size = PAGE_SIZE << get_order(size); } if (! dmab->area) return -ENOMEM; -- GitLab From 33a3444a096af8451919ca49cf31ce7e7c946b34 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 Jul 2018 17:11:38 +0200 Subject: [PATCH 0802/1291] ALSA: vxpocket: Fix invalid endian conversions commit 3acd3e3bab95ec3622ff98da313290ee823a0f68 upstream. The endian conversions used in vxp_dma_read() and vxp_dma_write() are superfluous and even wrong on big-endian machines, as inw() and outw() already do conversions. Kill them. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pcmcia/vx/vxp_ops.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/pcmcia/vx/vxp_ops.c b/sound/pcmcia/vx/vxp_ops.c index 8cde40226355..4c4ef1fec69f 100644 --- a/sound/pcmcia/vx/vxp_ops.c +++ b/sound/pcmcia/vx/vxp_ops.c @@ -375,7 +375,7 @@ static void vxp_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime, length >>= 1; /* in 16bit words */ /* Transfer using pseudo-dma. */ for (; length > 0; length--) { - outw(cpu_to_le16(*addr), port); + outw(*addr, port); addr++; } addr = (unsigned short *)runtime->dma_area; @@ -385,7 +385,7 @@ static void vxp_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime, count >>= 1; /* in 16bit words */ /* Transfer using pseudo-dma. */ for (; count > 0; count--) { - outw(cpu_to_le16(*addr), port); + outw(*addr, port); addr++; } vx_release_pseudo_dma(chip); @@ -417,7 +417,7 @@ static void vxp_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime, length >>= 1; /* in 16bit words */ /* Transfer using pseudo-dma. */ for (; length > 0; length--) - *addr++ = le16_to_cpu(inw(port)); + *addr++ = inw(port); addr = (unsigned short *)runtime->dma_area; pipe->hw_ptr = 0; } @@ -425,12 +425,12 @@ static void vxp_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime, count >>= 1; /* in 16bit words */ /* Transfer using pseudo-dma. */ for (; count > 1; count--) - *addr++ = le16_to_cpu(inw(port)); + *addr++ = inw(port); /* Disable DMA */ pchip->regDIALOG &= ~VXP_DLG_DMAREAD_SEL_MASK; vx_outb(chip, DIALOG, pchip->regDIALOG); /* Read the last word (16 bits) */ - *addr = le16_to_cpu(inw(port)); + *addr = inw(port); /* Disable 16-bit accesses */ pchip->regDIALOG &= ~VXP_DLG_DMA16_SEL_MASK; vx_outb(chip, DIALOG, pchip->regDIALOG); -- GitLab From d17b9ab7d954db5328b4d30fcad5cdce226824ae Mon Sep 17 00:00:00 2001 From: John Ogness Date: Sun, 24 Jun 2018 00:32:11 +0200 Subject: [PATCH 0803/1291] USB: serial: sierra: fix potential deadlock at close commit e60870012e5a35b1506d7b376fddfb30e9da0b27 upstream. The portdata spinlock can be taken in interrupt context (via sierra_outdat_callback()). Disable interrupts when taking the portdata spinlock when discarding deferred URBs during close to prevent a possible deadlock. Fixes: 014333f77c0b ("USB: sierra: fix urb and memory leak on disconnect") Cc: stable Signed-off-by: John Ogness Signed-off-by: Sebastian Andrzej Siewior [ johan: amend commit message and add fixes and stable tags ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/sierra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index 4c4ac4705ac0..a9c5564b6b65 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -773,9 +773,9 @@ static void sierra_close(struct usb_serial_port *port) kfree(urb->transfer_buffer); usb_free_urb(urb); usb_autopm_put_interface_async(serial->interface); - spin_lock(&portdata->lock); + spin_lock_irq(&portdata->lock); portdata->outstanding_urbs--; - spin_unlock(&portdata->lock); + spin_unlock_irq(&portdata->lock); } sierra_stop_rx_urbs(port); -- GitLab From 925bce3815d8ed028944b3f758cf8edd14524431 Mon Sep 17 00:00:00 2001 From: Movie Song Date: Thu, 19 Jul 2018 02:20:48 +0800 Subject: [PATCH 0804/1291] USB: serial: pl2303: add a new device id for ATEN commit 29c692c96b3a39cd1911fb79cd2505af8d070f07 upstream. Signed-off-by: Movie Song Cc: Johan Hovold Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 2 ++ drivers/usb/serial/pl2303.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 34c5a75f98a7..2153e67eeeee 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -55,6 +55,8 @@ static const struct usb_device_id id_table[] = { .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_UC485), .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, + { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_UC232B), + .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID2) }, { USB_DEVICE(ATEN_VENDOR_ID2, ATEN_PRODUCT_ID) }, { USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 123289085ee2..cec7141245ef 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -29,6 +29,7 @@ #define ATEN_VENDOR_ID2 0x0547 #define ATEN_PRODUCT_ID 0x2008 #define ATEN_PRODUCT_UC485 0x2021 +#define ATEN_PRODUCT_UC232B 0x2022 #define ATEN_PRODUCT_ID2 0x2118 #define IODATA_VENDOR_ID 0x04bb -- GitLab From 60d93a4b6d32b7e051715c16c0e9b848aef42581 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Tue, 24 Jul 2018 01:34:01 +0200 Subject: [PATCH 0805/1291] USB: option: add support for DW5821e commit 7bab01ecc6c43da882333c6db39741cb43677004 upstream. The device exposes AT, NMEA and DIAG ports in both USB configurations. The patch explicitly ignores interfaces 0 and 1, as they're bound to other drivers already; and also interface 6, which is a GNSS interface for which we don't have a driver yet. T: Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 18 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 2 P: Vendor=413c ProdID=81d7 Rev=03.18 S: Manufacturer=DELL S: Product=DW5821e Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 7 Cfg#= 2 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 6 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) T: Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 2 P: Vendor=413c ProdID=81d7 Rev=03.18 S: Manufacturer=DELL S: Product=DW5821e Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 1 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=00 Prot=00 Driver=usbhid I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option Signed-off-by: Aleksander Morgado Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index d4124551fb56..0600dadd6a0c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -199,6 +199,8 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5800_V2_MINICARD_VZW 0x8196 /* Novatel E362 */ #define DELL_PRODUCT_5804_MINICARD_ATT 0x819b /* Novatel E371 */ +#define DELL_PRODUCT_5821E 0x81d7 + #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da #define KYOCERA_PRODUCT_KPC680 0x180a @@ -1033,6 +1035,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_MINICARD_VZW, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_V2_MINICARD_VZW, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5804_MINICARD_ATT, 0xff, 0xff, 0xff) }, + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E), + .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) }, -- GitLab From 21be9327e00c5085d447dc904c3eec8b69cddf4d Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 9 Jul 2018 14:03:55 +0200 Subject: [PATCH 0806/1291] ACPI / PM: save NVS memory for ASUS 1025C laptop commit 231f9415001138a000cd0f881c46654b7ea3f8c5 upstream. Every time I tried to upgrade my laptop from 3.10.x to 4.x I faced an issue by which the fan would run at full speed upon resume. Bisecting it showed me the issue was introduced in 3.17 by commit 821d6f0359b0 (ACPI / sleep: Do not save NVS for new machines to accelerate S3). This code only affects machines built starting as of 2012, but this Asus 1025C laptop was made in 2012 and apparently needs the NVS data to be saved, otherwise the CPU's thermal state is not properly reported on resume and the fan runs at full speed upon resume. Here's a very simple way to check if such a machine is affected : # cat /sys/class/thermal/thermal_zone0/temp 55000 ( now suspend, wait one second and resume ) # cat /sys/class/thermal/thermal_zone0/temp 0 (and after ~15 seconds the fan starts to spin) Let's apply the same quirk as commit cbc00c13 (ACPI: save NVS memory for Lenovo G50-45) and reuse the function it provides. Note that this commit was already backported to 4.9.x but not 4.4.x. Cc: 3.17+ # 3.17+: requires cbc00c13 Signed-off-by: Willy Tarreau Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/sleep.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 2ef0ad6a33d6..7a0af16f86f2 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -338,6 +338,14 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "K54HR"), }, }, + { + .callback = init_nvs_save_s3, + .ident = "Asus 1025C", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "1025C"), + }, + }, /* * https://bugzilla.kernel.org/show_bug.cgi?id=189431 * Lenovo G50-45 is a platform later than 2012, but needs nvs memory -- GitLab From 71cc9159212b6aa076f6b731d557dccc668ec9cc Mon Sep 17 00:00:00 2001 From: Mark Date: Sun, 12 Aug 2018 11:47:16 -0400 Subject: [PATCH 0807/1291] tty: serial: 8250: Revert NXP SC16C2552 workaround commit 47ac76662ca9c5852fd353093f19de3ae85f2e66 upstream. Revert commit ecb988a3b7985913d1f0112f66667cdd15e40711: tty: serial: 8250: 8250_core: NXP SC16C2552 workaround The above commit causes userland application to no longer write correctly its first write to a dumb terminal connected to /dev/ttyS0. This commit seems to be the culprit. It's as though the TX FIFO is being reset during that write. What should be displayed is: PSW 80000000 INST 00000000 HALT // What is displayed is some variation of: T 00000000 HAL// Reverting this commit via this patch fixes my problem. Signed-off-by: Mark Hounschell Fixes: ecb988a3b798 ("tty: serial: 8250: 8250_core: NXP SC16C2552 workaround") Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index be456ea27ab2..ecf3d631bc09 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -94,8 +94,7 @@ static const struct serial8250_config uart_config[] = { .name = "16550A", .fifo_size = 16, .tx_loadsz = 16, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 | - UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .rxtrig_bytes = {1, 4, 8, 14}, .flags = UART_CAP_FIFO, }, -- GitLab From da93a03fdd0a40ed91568434fade58d9043be20e Mon Sep 17 00:00:00 2001 From: Aaron Sierra Date: Tue, 24 Jul 2018 14:23:46 -0500 Subject: [PATCH 0808/1291] serial: 8250_exar: Read INT0 from slave device, too commit 60ab0fafc4b652fcaf7cbc3bb8555a0cf1149c28 upstream. The sleep wake-up refactoring that I introduced in commit c7e1b4059075 ("tty: serial: exar: Relocate sleep wake-up handling") did not account for devices with a slave device on the expansion port. This patch pokes the INT0 register in the slave device, if present, in order to ensure that MSI interrupts don't get permanently "stuck" because of a sleep wake-up interrupt as described here: commit 2c0ac5b48a35 ("serial: exar: Fix stuck MSIs") This also converts an ioread8() to readb() in order to provide visual consistency with the MMIO-only accessors used elsewhere in the driver. Reported-by: Andy Shevchenko Signed-off-by: Aaron Sierra Fixes: c7e1b4059075 ("tty: serial: exar: Relocate sleep wake-up handling") Reviewed-by: Andy Shevchenko Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_exar.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index e0aa5f03004c..411b4b03457b 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -436,7 +436,11 @@ static irqreturn_t exar_misc_handler(int irq, void *data) struct exar8250 *priv = data; /* Clear all PCI interrupts by reading INT0. No effect on IIR */ - ioread8(priv->virt + UART_EXAR_INT0); + readb(priv->virt + UART_EXAR_INT0); + + /* Clear INT0 for Expansion Interface slave ports, too */ + if (priv->board->num_ports > 8) + readb(priv->virt + 0x2000 + UART_EXAR_INT0); return IRQ_HANDLED; } -- GitLab From f6790793d80d6d5f139436e3401bfde0436e8eee Mon Sep 17 00:00:00 2001 From: Chen Hu Date: Fri, 27 Jul 2018 18:32:41 +0800 Subject: [PATCH 0809/1291] serial: 8250_dw: always set baud rate in dw8250_set_termios commit dfcab6ba573445c703235ab6c83758eec12d7f28 upstream. dw8250_set_termios() doesn't set baud rate if the arg "old ktermios" is NULL. This happens during resume. Call Trace: ... [ 54.928108] dw8250_set_termios+0x162/0x170 [ 54.928114] serial8250_set_termios+0x17/0x20 [ 54.928117] uart_change_speed+0x64/0x160 [ 54.928119] uart_resume_port ... So the baud rate is not restored after S3 and breaks the apps who use UART, for example, console and bluetooth etc. We address this issue by setting the baud rate irrespective of arg "old", just like the drivers for other 8250 IPs. This is tested with Intel Broxton platform. Signed-off-by: Chen Hu Fixes: 4e26b134bd17 ("serial: 8250_dw: clock rate handling for all ACPI platforms") Cc: Heikki Krogerus Cc: stable Reviewed-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 3015789265dd..3fd0cdce723d 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -260,7 +260,7 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios, long rate; int ret; - if (IS_ERR(d->clk) || !old) + if (IS_ERR(d->clk)) goto out; clk_disable_unprepare(d->clk); -- GitLab From 3620bc1ceb145fe34f9a585814ab18d34851798b Mon Sep 17 00:00:00 2001 From: Srinath Mannam Date: Sat, 28 Jul 2018 20:55:15 +0530 Subject: [PATCH 0810/1291] serial: 8250_dw: Add ACPI support for uart on Broadcom SoC commit 784c29eda5b4e28c3a56aa90b3815f9a1b0cfdc1 upstream. Add ACPI identifier HID for UART DW 8250 on Broadcom SoCs to match the HID passed through ACPI tables to enable UART controller. Signed-off-by: Srinath Mannam Reviewed-by: Vladimir Olovyannikov Tested-by: Vladimir Olovyannikov Reviewed-by: Andy Shevchenko Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 3fd0cdce723d..27c5b2b46b8d 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -672,6 +672,7 @@ static const struct acpi_device_id dw8250_acpi_match[] = { { "APMC0D08", 0}, { "AMD0020", 0 }, { "AMDI0020", 0 }, + { "BRCM2032", 0 }, { "HISI0031", 0 }, { }, }; -- GitLab From 31e4f8ba01731ea34d57ff10a2ffc8f096160e0e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jul 2018 12:05:47 +0200 Subject: [PATCH 0811/1291] misc: sram: fix resource leaks in probe error path commit f294d00961d1d869ecffa60e280eeeee1ccf9a49 upstream. Make sure to disable clocks and deregister any exported partitions before returning on late probe errors. Note that since commit ee895ccdf776 ("misc: sram: fix enabled clock leak on error path"), partitions are deliberately exported before enabling the clock so we stick to that logic here. A follow up patch will address this. Cc: stable # 4.9 Cc: Alexandre Belloni Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/sram.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c index fc0415771c00..4dd0d868ff88 100644 --- a/drivers/misc/sram.c +++ b/drivers/misc/sram.c @@ -407,13 +407,20 @@ static int sram_probe(struct platform_device *pdev) if (init_func) { ret = init_func(); if (ret) - return ret; + goto err_disable_clk; } dev_dbg(sram->dev, "SRAM pool: %zu KiB @ 0x%p\n", gen_pool_size(sram->pool) / 1024, sram->virt_base); return 0; + +err_disable_clk: + if (sram->clk) + clk_disable_unprepare(sram->clk); + sram_free_partitions(sram); + + return ret; } static int sram_remove(struct platform_device *pdev) -- GitLab From f276e2efb66118e3fd98e38a7903b204c85e4cec Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Sun, 15 Jul 2018 20:36:50 +0100 Subject: [PATCH 0812/1291] Bluetooth: avoid killing an already killed socket commit 4e1a720d0312fd510699032c7694a362a010170f upstream. slub debug reported: [ 440.648642] ============================================================================= [ 440.648649] BUG kmalloc-1024 (Tainted: G BU O ): Poison overwritten [ 440.648651] ----------------------------------------------------------------------------- [ 440.648655] INFO: 0xe70f4bec-0xe70f4bec. First byte 0x6a instead of 0x6b [ 440.648665] INFO: Allocated in sk_prot_alloc+0x6b/0xc6 age=33155 cpu=1 pid=1047 [ 440.648671] ___slab_alloc.constprop.24+0x1fc/0x292 [ 440.648675] __slab_alloc.isra.18.constprop.23+0x1c/0x25 [ 440.648677] __kmalloc+0xb6/0x17f [ 440.648680] sk_prot_alloc+0x6b/0xc6 [ 440.648683] sk_alloc+0x1e/0xa1 [ 440.648700] sco_sock_alloc.constprop.6+0x26/0xaf [bluetooth] [ 440.648716] sco_connect_cfm+0x166/0x281 [bluetooth] [ 440.648731] hci_conn_request_evt.isra.53+0x258/0x281 [bluetooth] [ 440.648746] hci_event_packet+0x28b/0x2326 [bluetooth] [ 440.648759] hci_rx_work+0x161/0x291 [bluetooth] [ 440.648764] process_one_work+0x163/0x2b2 [ 440.648767] worker_thread+0x1a9/0x25c [ 440.648770] kthread+0xf8/0xfd [ 440.648774] ret_from_fork+0x2e/0x38 [ 440.648779] INFO: Freed in __sk_destruct+0xd3/0xdf age=3815 cpu=1 pid=1047 [ 440.648782] __slab_free+0x4b/0x27a [ 440.648784] kfree+0x12e/0x155 [ 440.648787] __sk_destruct+0xd3/0xdf [ 440.648790] sk_destruct+0x27/0x29 [ 440.648793] __sk_free+0x75/0x91 [ 440.648795] sk_free+0x1c/0x1e [ 440.648810] sco_sock_kill+0x5a/0x5f [bluetooth] [ 440.648825] sco_conn_del+0x8e/0xba [bluetooth] [ 440.648840] sco_disconn_cfm+0x3a/0x41 [bluetooth] [ 440.648855] hci_event_packet+0x45e/0x2326 [bluetooth] [ 440.648868] hci_rx_work+0x161/0x291 [bluetooth] [ 440.648872] process_one_work+0x163/0x2b2 [ 440.648875] worker_thread+0x1a9/0x25c [ 440.648877] kthread+0xf8/0xfd [ 440.648880] ret_from_fork+0x2e/0x38 [ 440.648884] INFO: Slab 0xf4718580 objects=27 used=27 fp=0x (null) flags=0x40008100 [ 440.648886] INFO: Object 0xe70f4b88 @offset=19336 fp=0xe70f54f8 When KASAN was enabled, it reported: [ 210.096613] ================================================================== [ 210.096634] BUG: KASAN: use-after-free in ex_handler_refcount+0x5b/0x127 [ 210.096641] Write of size 4 at addr ffff880107e17160 by task kworker/u9:1/2040 [ 210.096651] CPU: 1 PID: 2040 Comm: kworker/u9:1 Tainted: G U O 4.14.47-20180606+ #2 [ 210.096654] Hardware name: , BIOS 2017.01-00087-g43e04de 08/30/2017 [ 210.096693] Workqueue: hci0 hci_rx_work [bluetooth] [ 210.096698] Call Trace: [ 210.096711] dump_stack+0x46/0x59 [ 210.096722] print_address_description+0x6b/0x23b [ 210.096729] ? ex_handler_refcount+0x5b/0x127 [ 210.096736] kasan_report+0x220/0x246 [ 210.096744] ex_handler_refcount+0x5b/0x127 [ 210.096751] ? ex_handler_clear_fs+0x85/0x85 [ 210.096757] fixup_exception+0x8c/0x96 [ 210.096766] do_trap+0x66/0x2c1 [ 210.096773] do_error_trap+0x152/0x180 [ 210.096781] ? fixup_bug+0x78/0x78 [ 210.096817] ? hci_debugfs_create_conn+0x244/0x26a [bluetooth] [ 210.096824] ? __schedule+0x113b/0x1453 [ 210.096830] ? sysctl_net_exit+0xe/0xe [ 210.096837] ? __wake_up_common+0x343/0x343 [ 210.096843] ? insert_work+0x107/0x163 [ 210.096850] invalid_op+0x1b/0x40 [ 210.096888] RIP: 0010:hci_debugfs_create_conn+0x244/0x26a [bluetooth] [ 210.096892] RSP: 0018:ffff880094a0f970 EFLAGS: 00010296 [ 210.096898] RAX: 0000000000000000 RBX: ffff880107e170e8 RCX: ffff880107e17160 [ 210.096902] RDX: 000000000000002f RSI: ffff88013b80ed40 RDI: ffffffffa058b940 [ 210.096906] RBP: ffff88011b2b0578 R08: 00000000852f0ec9 R09: ffffffff81cfcf9b [ 210.096909] R10: 00000000d21bdad7 R11: 0000000000000001 R12: ffff8800967b0488 [ 210.096913] R13: ffff880107e17168 R14: 0000000000000068 R15: ffff8800949c0008 [ 210.096920] ? __sk_destruct+0x2c6/0x2d4 [ 210.096959] hci_event_packet+0xff5/0x7de2 [bluetooth] [ 210.096969] ? __local_bh_enable_ip+0x43/0x5b [ 210.097004] ? l2cap_sock_recv_cb+0x158/0x166 [bluetooth] [ 210.097039] ? hci_le_meta_evt+0x2bb3/0x2bb3 [bluetooth] [ 210.097075] ? l2cap_ertm_init+0x94e/0x94e [bluetooth] [ 210.097093] ? xhci_urb_enqueue+0xbd8/0xcf5 [xhci_hcd] [ 210.097102] ? __accumulate_pelt_segments+0x24/0x33 [ 210.097109] ? __accumulate_pelt_segments+0x24/0x33 [ 210.097115] ? __update_load_avg_se.isra.2+0x217/0x3a4 [ 210.097122] ? set_next_entity+0x7c3/0x12cd [ 210.097128] ? pick_next_entity+0x25e/0x26c [ 210.097135] ? pick_next_task_fair+0x2ca/0xc1a [ 210.097141] ? switch_mm_irqs_off+0x346/0xb4f [ 210.097147] ? __switch_to+0x769/0xbc4 [ 210.097153] ? compat_start_thread+0x66/0x66 [ 210.097188] ? hci_conn_check_link_mode+0x1cd/0x1cd [bluetooth] [ 210.097195] ? finish_task_switch+0x392/0x431 [ 210.097228] ? hci_rx_work+0x154/0x487 [bluetooth] [ 210.097260] hci_rx_work+0x154/0x487 [bluetooth] [ 210.097269] process_one_work+0x579/0x9e9 [ 210.097277] worker_thread+0x68f/0x804 [ 210.097285] kthread+0x31c/0x32b [ 210.097292] ? rescuer_thread+0x70c/0x70c [ 210.097299] ? kthread_create_on_node+0xa3/0xa3 [ 210.097306] ret_from_fork+0x35/0x40 [ 210.097314] Allocated by task 2040: [ 210.097323] kasan_kmalloc.part.1+0x51/0xc7 [ 210.097328] __kmalloc+0x17f/0x1b6 [ 210.097335] sk_prot_alloc+0xf2/0x1a3 [ 210.097340] sk_alloc+0x22/0x297 [ 210.097375] sco_sock_alloc.constprop.7+0x23/0x202 [bluetooth] [ 210.097410] sco_connect_cfm+0x2d0/0x566 [bluetooth] [ 210.097443] hci_conn_request_evt.isra.53+0x6d3/0x762 [bluetooth] [ 210.097476] hci_event_packet+0x85e/0x7de2 [bluetooth] [ 210.097507] hci_rx_work+0x154/0x487 [bluetooth] [ 210.097512] process_one_work+0x579/0x9e9 [ 210.097517] worker_thread+0x68f/0x804 [ 210.097523] kthread+0x31c/0x32b [ 210.097529] ret_from_fork+0x35/0x40 [ 210.097533] Freed by task 2040: [ 210.097539] kasan_slab_free+0xb3/0x15e [ 210.097544] kfree+0x103/0x1a9 [ 210.097549] __sk_destruct+0x2c6/0x2d4 [ 210.097584] sco_conn_del.isra.1+0xba/0x10e [bluetooth] [ 210.097617] hci_event_packet+0xff5/0x7de2 [bluetooth] [ 210.097648] hci_rx_work+0x154/0x487 [bluetooth] [ 210.097653] process_one_work+0x579/0x9e9 [ 210.097658] worker_thread+0x68f/0x804 [ 210.097663] kthread+0x31c/0x32b [ 210.097670] ret_from_fork+0x35/0x40 [ 210.097676] The buggy address belongs to the object at ffff880107e170e8 which belongs to the cache kmalloc-1024 of size 1024 [ 210.097681] The buggy address is located 120 bytes inside of 1024-byte region [ffff880107e170e8, ffff880107e174e8) [ 210.097683] The buggy address belongs to the page: [ 210.097689] page:ffffea00041f8400 count:1 mapcount:0 mapping: (null) index:0xffff880107e15b68 compound_mapcount: 0 [ 210.110194] flags: 0x8000000000008100(slab|head) [ 210.115441] raw: 8000000000008100 0000000000000000 ffff880107e15b68 0000000100170016 [ 210.115448] raw: ffffea0004a47620 ffffea0004b48e20 ffff88013b80ed40 0000000000000000 [ 210.115451] page dumped because: kasan: bad access detected [ 210.115454] Memory state around the buggy address: [ 210.115460] ffff880107e17000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 210.115465] ffff880107e17080: fc fc fc fc fc fc fc fc fc fc fc fc fc fb fb fb [ 210.115469] >ffff880107e17100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 210.115472] ^ [ 210.115477] ffff880107e17180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 210.115481] ffff880107e17200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 210.115483] ================================================================== And finally when BT_DBG() and ftrace was enabled it showed: <...>-14979 [001] .... 186.104191: sco_sock_kill <-sco_sock_close <...>-14979 [001] .... 186.104191: sco_sock_kill <-sco_sock_release <...>-14979 [001] .... 186.104192: sco_sock_kill: sk ef0497a0 state 9 <...>-14979 [001] .... 186.104193: bt_sock_unlink <-sco_sock_kill kworker/u9:2-792 [001] .... 186.104246: sco_sock_kill <-sco_conn_del kworker/u9:2-792 [001] .... 186.104248: sco_sock_kill: sk ef0497a0 state 9 kworker/u9:2-792 [001] .... 186.104249: bt_sock_unlink <-sco_sock_kill kworker/u9:2-792 [001] .... 186.104250: sco_sock_destruct <-__sk_destruct kworker/u9:2-792 [001] .... 186.104250: sco_sock_destruct: sk ef0497a0 kworker/u9:2-792 [001] .... 186.104860: hci_conn_del <-hci_event_packet kworker/u9:2-792 [001] .... 186.104864: hci_conn_del: hci0 hcon ef0484c0 handle 266 Only in the failed case, sco_sock_kill() gets called with the same sock pointer two times. Add a check for SOCK_DEAD to avoid continue killing a socket which has already been killed. Signed-off-by: Sudip Mukherjee Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/sco.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 795e920a3281..81fe3949c158 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -393,7 +393,8 @@ static void sco_sock_cleanup_listen(struct sock *parent) */ static void sco_sock_kill(struct sock *sk) { - if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) + if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket || + sock_flag(sk, SOCK_DEAD)) return; BT_DBG("sk %p state %d", sk, sk->sk_state); -- GitLab From 2b2cdec98e035b57b0374418fd5e3ba1610c9f10 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 15 Aug 2018 12:14:05 -0700 Subject: [PATCH 0813/1291] isdn: Disable IIOCDBGVAR [ Upstream commit 5e22002aa8809e2efab2da95855f73f63e14a36c ] It was possible to directly leak the kernel address where the isdn_dev structure pointer was stored. This is a kernel ASLR bypass for anyone with access to the ioctl. The code had been present since the beginning of git history, though this shouldn't ever be needed for normal operation, therefore remove it. Reported-by: Al Viro Cc: Karsten Keil Signed-off-by: Kees Cook Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/i4l/isdn_common.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c index 38a5bb764c7b..598724ffde4e 100644 --- a/drivers/isdn/i4l/isdn_common.c +++ b/drivers/isdn/i4l/isdn_common.c @@ -1640,13 +1640,7 @@ isdn_ioctl(struct file *file, uint cmd, ulong arg) } else return -EINVAL; case IIOCDBGVAR: - if (arg) { - if (copy_to_user(argp, &dev, sizeof(ulong))) - return -EFAULT; - return 0; - } else - return -EINVAL; - break; + return -EINVAL; default: if ((cmd & IIOCDRVCTL) == IIOCDRVCTL) cmd = ((cmd >> _IOC_NRSHIFT) & _IOC_NRMASK) & ISDN_DRVIOCTL_MASK; -- GitLab From 178742867e4f5f2edefeff69cfeb9b6a24ea9ece Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 14 Aug 2018 17:28:26 +0800 Subject: [PATCH 0814/1291] cls_matchall: fix tcf_unbind_filter missing [ Upstream commit a51c76b4dfb30496dc65396a957ef0f06af7fb22 ] Fix tcf_unbind_filter missing in cls_matchall as this will trigger WARN_ON() in cbq_destroy_class(). Fixes: fd62d9f5c575f ("net/sched: matchall: Fix configuration race") Reported-by: Li Shuang Signed-off-by: Hangbin Liu Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_matchall.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 3684153cd8a9..6499aecfbfc4 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -112,6 +112,8 @@ static void mall_destroy(struct tcf_proto *tp) if (!head) return; + tcf_unbind_filter(tp, &head->res); + if (tc_should_offload(dev, head->flags)) mall_destroy_hw_filter(tp, head, (unsigned long) head); -- GitLab From c5f7d3c4daf14ba3717fcc1497854d8c365bd742 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 22 Aug 2018 07:46:11 +0200 Subject: [PATCH 0815/1291] Linux 4.14.66 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7995690ff1aa..e69d0d091742 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 65 +SUBLEVEL = 66 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 4bc32484df6a3bdc173301adff0c2fdc66667a40 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Thu, 2 Aug 2018 00:03:40 -0400 Subject: [PATCH 0816/1291] ext4: fix spectre gadget in ext4_mb_regular_allocator() commit 1a5d5e5d51e75a5bca67dadbcea8c841934b7b85 upstream. 'ac->ac_g_ex.fe_len' is a user-controlled value which is used in the derivation of 'ac->ac_2order'. 'ac->ac_2order', in turn, is used to index arrays which makes it a potential spectre gadget. Fix this by sanitizing the value assigned to 'ac->ac2_order'. This covers the following accesses found with the help of smatch: * fs/ext4/mballoc.c:1896 ext4_mb_simple_scan_group() warn: potential spectre issue 'grp->bb_counters' [w] (local cap) * fs/ext4/mballoc.c:445 mb_find_buddy() warn: potential spectre issue 'EXT4_SB(e4b->bd_sb)->s_mb_offsets' [r] (local cap) * fs/ext4/mballoc.c:446 mb_find_buddy() warn: potential spectre issue 'EXT4_SB(e4b->bd_sb)->s_mb_maxs' [r] (local cap) Suggested-by: Josh Poimboeuf Signed-off-by: Jeremy Cline Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 048c586d9a8b..1792999eec91 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -2152,7 +2153,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) * This should tell if fe_len is exactly power of 2 */ if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0) - ac->ac_2order = i - 1; + ac->ac_2order = array_index_nospec(i - 1, + sb->s_blocksize_bits + 2); } /* if stream allocation is enabled, use global goal */ -- GitLab From b518e870f1f605536b544d81e52f4570bc9e340f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 2 Aug 2018 22:40:19 -0500 Subject: [PATCH 0817/1291] drm/i915/kvmgt: Fix potential Spectre v1 commit de5372da605d3bca46e3102bab51b7e1c0e0a6f6 upstream. info.index can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/gpu/drm/i915/gvt/kvmgt.c:1232 intel_vgpu_ioctl() warn: potential spectre issue 'vgpu->vdev.region' [r] Fix this by sanitizing info.index before indirectly using it to index vgpu->vdev.region Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Zhenyu Wang Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gvt/kvmgt.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 9bf4045cd679..73c672fc17c4 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -42,6 +42,8 @@ #include #include +#include + #include "i915_drv.h" #include "gvt.h" @@ -953,7 +955,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { struct vfio_region_info info; struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; - int i, ret; + unsigned int i; + int ret; struct vfio_region_info_cap_sparse_mmap *sparse = NULL; size_t size; int nr_areas = 1; @@ -1030,6 +1033,10 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, if (info.index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) return -EINVAL; + info.index = + array_index_nospec(info.index, + VFIO_PCI_NUM_REGIONS + + vgpu->vdev.num_regions); i = info.index - VFIO_PCI_NUM_REGIONS; -- GitLab From 96de65df193607b81ba7323861d58ff7efd3bf38 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 10 Aug 2018 16:14:26 +0200 Subject: [PATCH 0818/1291] EDAC: Add missing MEM_LRDDR4 entry in edac_mem_types[] commit b748f2de4b2f578599f46c6000683a8da755bf68 upstream. The edac_mem_types[] array misses a MEM_LRDDR4 entry, which leads to NULL pointer dereference when accessed via sysfs or such. Signed-off-by: Takashi Iwai Cc: Mauro Carvalho Chehab Cc: Yazen Ghannam Cc: linux-edac Cc: Link: http://lkml.kernel.org/r/20180810141426.8918-1-tiwai@suse.de Fixes: 1e8096bb2031 ("EDAC: Add LRDDR4 DRAM type") Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/edac_mc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 480072139b7a..80801c616395 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -215,6 +215,7 @@ const char * const edac_mem_types[] = { [MEM_LRDDR3] = "Load-Reduced DDR3 RAM", [MEM_DDR4] = "Unbuffered DDR4 RAM", [MEM_RDDR4] = "Registered DDR4 RAM", + [MEM_LRDDR4] = "Load-Reduced-DDR4-RAM", }; EXPORT_SYMBOL_GPL(edac_mem_types); -- GitLab From 4aec7c283904207d6dc2bd3a319d4e77ce60c77c Mon Sep 17 00:00:00 2001 From: Matthijs van Duin Date: Thu, 19 Jul 2018 10:43:46 +0200 Subject: [PATCH 0819/1291] pty: fix O_CLOEXEC for TIOCGPTPEER commit 36ecc1481dc8d8c52d43ba18c6b642c1d2fde789 upstream. It was being ignored because the flags were not passed to fd allocation. Fixes: 54ebbfb16034 ("tty: add TIOCGPTPEER ioctl") Signed-off-by: Matthijs van Duin Acked-by: Aleksa Sarai Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/pty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 899e8fe5e00f..9e26c530d2dd 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -625,7 +625,7 @@ int ptm_open_peer(struct file *master, struct tty_struct *tty, int flags) if (tty->driver != ptm_driver) return -EIO; - fd = get_unused_fd_flags(0); + fd = get_unused_fd_flags(flags); if (fd < 0) { retval = fd; goto err; -- GitLab From 67c0f84827e123953b8baac9de20fe039b5c83fd Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 25 May 2018 16:01:47 +0530 Subject: [PATCH 0820/1291] arm: dts: armada: Fix "#cooling-cells" property's name [ Upstream commit ac62cc9d9cd6fa4c79e171c13dc8d58c3862b678 ] It should be "#cooling-cells" instead of "cooling-cells". Fix it. Signed-off-by: Viresh Kumar Signed-off-by: Gregory CLEMENT Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/armada-385-synology-ds116.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/armada-385-synology-ds116.dts b/arch/arm/boot/dts/armada-385-synology-ds116.dts index 31510eb56f10..874189b4d218 100644 --- a/arch/arm/boot/dts/armada-385-synology-ds116.dts +++ b/arch/arm/boot/dts/armada-385-synology-ds116.dts @@ -170,7 +170,7 @@ gpio-fan { 3700 5 3900 6 4000 7>; - cooling-cells = <2>; + #cooling-cells = <2>; }; gpio-leds { -- GitLab From 7dcffd98360a1f0e953eb2aa86976032d83f90dd Mon Sep 17 00:00:00 2001 From: Dong Jia Shi Date: Wed, 2 May 2018 09:25:59 +0200 Subject: [PATCH 0821/1291] vfio: ccw: fix error return in vfio_ccw_sch_event [ Upstream commit 2c861d89ccda2fbcea9358eff9cc5f8fae548be5 ] If the device has not been registered, or there is work pending, we should reschedule a sch_event call again. Signed-off-by: Dong Jia Shi Message-Id: <20180502072559.50691-1-bjsdjshi@linux.vnet.ibm.com> Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/cio/vfio_ccw_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 82f05c4b8c52..ae7a49ade414 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -176,6 +176,7 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process) { struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); unsigned long flags; + int rc = -EAGAIN; spin_lock_irqsave(sch->lock, flags); if (!device_is_registered(&sch->dev)) @@ -186,6 +187,7 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process) if (cio_update_schib(sch)) { vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); + rc = 0; goto out_unlock; } @@ -194,11 +196,12 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process) private->state = private->mdev ? VFIO_CCW_STATE_IDLE : VFIO_CCW_STATE_STANDBY; } + rc = 0; out_unlock: spin_unlock_irqrestore(sch->lock, flags); - return 0; + return rc; } static struct css_device_id vfio_ccw_sch_ids[] = { -- GitLab From d4108ed2a57fdbbedd99ebfac264d560e2e5eb73 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 7 Jun 2018 00:15:05 +0200 Subject: [PATCH 0822/1291] perf tools: Fix error index for pmu event parser [ Upstream commit f7fa827f5f432a0b1f34e10fc49da93aeef9f817 ] For events we provide specific error message we need to set error column index, PMU parser is missing that, adding it. Before: $ perf stat -e cycles,krava/cycles/ kill event syntax error: 'cycles,krava/cycles/' \___ Cannot find PMU `krava'. Missing kernel support? After: $ perf stat -e cycles,krava/cycles/ kill event syntax error: 'cycles,krava/cycles/' \___ Cannot find PMU `krava'. Missing kernel support? Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20180606221513.11302-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/parse-events.y | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 988310cd3049..b6115cbdf842 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -226,11 +226,16 @@ event_def: event_pmu | event_pmu: PE_NAME opt_pmu_config { + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; struct list_head *list, *orig_terms, *terms; if (parse_events_copy_term_list($2, &orig_terms)) YYABORT; + if (error) + error->idx = @1.first_column; + ALLOC_LIST(list); if (parse_events_add_pmu(_parse_state, list, $1, $2)) { struct perf_pmu *pmu = NULL; -- GitLab From 048f62f3725d30467d57b9a76e93a19c01af56b2 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 5 Jun 2018 10:34:54 -0700 Subject: [PATCH 0823/1291] Input: synaptics-rmi4 - fix axis-swap behavior [ Upstream commit 645a397d325db6e1bb36588095ae637738b37693 ] The documentation for the touchscreen-swapped-x-y property states that swapping is done after inverting if both are used. RMI4 did it the other way around, leading to inconsistent behavior with regard to other touchscreens. Signed-off-by: Lucas Stach Tested-by: Nick Dyer Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_2d_sensor.c | 34 ++++++++++++++---------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/input/rmi4/rmi_2d_sensor.c b/drivers/input/rmi4/rmi_2d_sensor.c index 8bb866c7b985..8eeffa066022 100644 --- a/drivers/input/rmi4/rmi_2d_sensor.c +++ b/drivers/input/rmi4/rmi_2d_sensor.c @@ -32,15 +32,15 @@ void rmi_2d_sensor_abs_process(struct rmi_2d_sensor *sensor, if (obj->type == RMI_2D_OBJECT_NONE) return; - if (axis_align->swap_axes) - swap(obj->x, obj->y); - if (axis_align->flip_x) obj->x = sensor->max_x - obj->x; if (axis_align->flip_y) obj->y = sensor->max_y - obj->y; + if (axis_align->swap_axes) + swap(obj->x, obj->y); + /* * Here checking if X offset or y offset are specified is * redundant. We just add the offsets or clip the values. @@ -120,15 +120,15 @@ void rmi_2d_sensor_rel_report(struct rmi_2d_sensor *sensor, int x, int y) x = min(RMI_2D_REL_POS_MAX, max(RMI_2D_REL_POS_MIN, (int)x)); y = min(RMI_2D_REL_POS_MAX, max(RMI_2D_REL_POS_MIN, (int)y)); - if (axis_align->swap_axes) - swap(x, y); - if (axis_align->flip_x) x = min(RMI_2D_REL_POS_MAX, -x); if (axis_align->flip_y) y = min(RMI_2D_REL_POS_MAX, -y); + if (axis_align->swap_axes) + swap(x, y); + if (x || y) { input_report_rel(sensor->input, REL_X, x); input_report_rel(sensor->input, REL_Y, y); @@ -141,17 +141,10 @@ static void rmi_2d_sensor_set_input_params(struct rmi_2d_sensor *sensor) struct input_dev *input = sensor->input; int res_x; int res_y; + int max_x, max_y; int input_flags = 0; if (sensor->report_abs) { - if (sensor->axis_align.swap_axes) { - swap(sensor->max_x, sensor->max_y); - swap(sensor->axis_align.clip_x_low, - sensor->axis_align.clip_y_low); - swap(sensor->axis_align.clip_x_high, - sensor->axis_align.clip_y_high); - } - sensor->min_x = sensor->axis_align.clip_x_low; if (sensor->axis_align.clip_x_high) sensor->max_x = min(sensor->max_x, @@ -163,14 +156,19 @@ static void rmi_2d_sensor_set_input_params(struct rmi_2d_sensor *sensor) sensor->axis_align.clip_y_high); set_bit(EV_ABS, input->evbit); - input_set_abs_params(input, ABS_MT_POSITION_X, 0, sensor->max_x, - 0, 0); - input_set_abs_params(input, ABS_MT_POSITION_Y, 0, sensor->max_y, - 0, 0); + + max_x = sensor->max_x; + max_y = sensor->max_y; + if (sensor->axis_align.swap_axes) + swap(max_x, max_y); + input_set_abs_params(input, ABS_MT_POSITION_X, 0, max_x, 0, 0); + input_set_abs_params(input, ABS_MT_POSITION_Y, 0, max_y, 0, 0); if (sensor->x_mm && sensor->y_mm) { res_x = (sensor->max_x - sensor->min_x) / sensor->x_mm; res_y = (sensor->max_y - sensor->min_y) / sensor->y_mm; + if (sensor->axis_align.swap_axes) + swap(res_x, res_y); input_abs_set_res(input, ABS_X, res_x); input_abs_set_res(input, ABS_Y, res_y); -- GitLab From da327a4b9d096a0734fc345ea96b483feafd0b4b Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Mon, 11 Jun 2018 20:15:11 +0200 Subject: [PATCH 0824/1291] IB/mlx4: Fix an error handling path in 'mlx4_ib_rereg_user_mr()' [ Upstream commit 3dc7c7badb7502ec3e3aa817a8bdd9e53aa54c52 ] Before returning -EPERM we should release some resources, as already done in the other error handling path of the function. Fixes: d8f9cc328c88 ("IB/mlx4: Mark user MR as writable if actual virtual memory is writable") Signed-off-by: Christophe JAILLET Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/mr.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 1587cedee13e..761d3ce6a63a 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -247,8 +247,11 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, } if (flags & IB_MR_REREG_ACCESS) { - if (ib_access_writable(mr_access_flags) && !mmr->umem->writable) - return -EPERM; + if (ib_access_writable(mr_access_flags) && + !mmr->umem->writable) { + err = -EPERM; + goto release_mpt_entry; + } err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry, convert_access(mr_access_flags)); -- GitLab From 69004038f01ccbccc5c99dd8a287331b77acc1b9 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 15 Jan 2018 18:33:57 +0100 Subject: [PATCH 0825/1291] drm/bridge/sii8620: fix loops in EDID fetch logic [ Upstream commit 8e627a1b1ce8feb3e1da4428b71b9b4905f04888 ] Function should constantly check if cable is connected and finish in finite time. Signed-off-by: Andrzej Hajda Tested-by: Marek Szyprowski Reviewed-by: Maciej Purski Link: https://patchwork.freedesktop.org/patch/msgid/20180115173357.31067-4-a.hajda@samsung.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/sil-sii8620.c | 31 ++++++++++++++++++---------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 5131bfb94f06..8b17a806ee6f 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -788,6 +788,7 @@ static void sii8620_burst_rx_all(struct sii8620 *ctx) static void sii8620_fetch_edid(struct sii8620 *ctx) { u8 lm_ddc, ddc_cmd, int3, cbus; + unsigned long timeout; int fetched, i; int edid_len = EDID_LENGTH; u8 *edid; @@ -837,23 +838,31 @@ static void sii8620_fetch_edid(struct sii8620 *ctx) REG_DDC_CMD, ddc_cmd | VAL_DDC_CMD_ENH_DDC_READ_NO_ACK ); - do { - int3 = sii8620_readb(ctx, REG_INTR3); + int3 = 0; + timeout = jiffies + msecs_to_jiffies(200); + for (;;) { cbus = sii8620_readb(ctx, REG_CBUS_STATUS); - - if (int3 & BIT_DDC_CMD_DONE) - break; - - if (!(cbus & BIT_CBUS_STATUS_CBUS_CONNECTED)) { + if (~cbus & BIT_CBUS_STATUS_CBUS_CONNECTED) { + kfree(edid); + edid = NULL; + goto end; + } + if (int3 & BIT_DDC_CMD_DONE) { + if (sii8620_readb(ctx, REG_DDC_DOUT_CNT) + >= FETCH_SIZE) + break; + } else { + int3 = sii8620_readb(ctx, REG_INTR3); + } + if (time_is_before_jiffies(timeout)) { + ctx->error = -ETIMEDOUT; + dev_err(ctx->dev, "timeout during EDID read\n"); kfree(edid); edid = NULL; goto end; } - } while (1); - - sii8620_readb(ctx, REG_DDC_STATUS); - while (sii8620_readb(ctx, REG_DDC_DOUT_CNT) < FETCH_SIZE) usleep_range(10, 20); + } sii8620_read_buf(ctx, REG_DDC_DATA, edid + fetched, FETCH_SIZE); if (fetched + FETCH_SIZE == EDID_LENGTH) { -- GitLab From c170373c723020696ba2eee0eb57b1407d3efe04 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Wed, 22 Nov 2017 10:08:38 +0100 Subject: [PATCH 0826/1291] drm/bridge/sii8620: fix potential buffer overflow [ Upstream commit 9378cecb1ce5d618b8aff4d65113ddcf72fc1011 ] Buffer overflow error should not occur, as mode_fixup() callback filters pixel clock value and it should never exceed 600000. However, current implementation is not obviously safe and relies on implementation of mode_fixup(). Make 'i' variable never reach unsafe value in order to avoid buffer overflow error. Reported-by: Dan Carpenter Fixes: bf1722ca ("drm/bridge/sii8620: rewrite hdmi start sequence") Signed-off-by: Maciej Purski Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1511341718-6974-1-git-send-email-m.purski@samsung.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/sil-sii8620.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 8b17a806ee6f..8a8c3ede8821 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -1196,7 +1196,7 @@ static void sii8620_start_hdmi(struct sii8620 *ctx) int clk = ctx->pixel_clock * (ctx->use_packed_pixel ? 2 : 3); int i; - for (i = 0; i < ARRAY_SIZE(clk_spec); ++i) + for (i = 0; i < ARRAY_SIZE(clk_spec) - 1; ++i) if (clk < clk_spec[i].max_clk) break; -- GitLab From c0f29f9d8aa86eec14cf12521597ad5af54e1cc7 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Fri, 1 Jun 2018 14:34:33 +0300 Subject: [PATCH 0827/1291] ARC: Explicitly add -mmedium-calls to CFLAGS [ Upstream commit 74c11e300c103af47db5b658fdcf28002421e250 ] GCC built for arc*-*-linux has "-mmedium-calls" implicitly enabled by default thus we don't see any problems during Linux kernel compilation. ----------------------------->8------------------------ arc-linux-gcc -mcpu=arc700 -Q --help=target | grep calls -mlong-calls [disabled] -mmedium-calls [enabled] ----------------------------->8------------------------ But if we try to use so-called Elf32 toolchain with GCC configured for arc*-*-elf* then we'd see the following failure: ----------------------------->8------------------------ init/do_mounts.o: In function 'init_rootfs': do_mounts.c:(.init.text+0x108): relocation truncated to fit: R_ARC_S21W_PCREL against symbol 'unregister_filesystem' defined in .text section in fs/filesystems.o arc-elf32-ld: final link failed: Symbol needs debug section which does not exist make: *** [vmlinux] Error 1 ----------------------------->8------------------------ That happens because neither "-mmedium-calls" nor "-mlong-calls" are enabled in Elf32 GCC: ----------------------------->8------------------------ arc-elf32-gcc -mcpu=arc700 -Q --help=target | grep calls -mlong-calls [disabled] -mmedium-calls [disabled] ----------------------------->8------------------------ Now to make it possible to use Elf32 toolchain for building Linux kernel we're explicitly add "-mmedium-calls" to CFLAGS. And since we add "-mmedium-calls" to the global CFLAGS there's no point in having per-file copies thus removing them. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/Makefile | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index d37f49d6a27f..6c1b20dd76ad 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -16,7 +16,7 @@ endif KBUILD_DEFCONFIG := nsim_700_defconfig -cflags-y += -fno-common -pipe -fno-builtin -D__linux__ +cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs @@ -140,16 +140,3 @@ dtbs: scripts archclean: $(Q)$(MAKE) $(clean)=$(boot) - -# Hacks to enable final link due to absence of link-time branch relexation -# and gcc choosing optimal(shorter) branches at -O3 -# -# vineetg Feb 2010: -mlong-calls switched off for overall kernel build -# However lib/decompress_inflate.o (.init.text) calls -# zlib_inflate_workspacesize (.text) causing relocation errors. -# Thus forcing all exten calls in this file to be long calls -export CFLAGS_decompress_inflate.o = -mmedium-calls -export CFLAGS_initramfs.o = -mmedium-calls -ifdef CONFIG_SMP -export CFLAGS_core.o = -mmedium-calls -endif -- GitLab From 4dff89e722d5186e9531a534f1000fee41798176 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 12 Jun 2018 15:19:35 -0700 Subject: [PATCH 0828/1291] hwmon: (nct6775) Fix loop limit [ Upstream commit 91bb8f45f73f19a0150c233c0f11cdeb6d71d1e9 ] Commit cc66b3038254 ("hwmon: (nct6775) Rework temperature source and label handling") changed a loop limit from "data->temp_label_num - 1" to "32", as part of moving from a string array to a bit mask. This results in the following error, reported by UBSAN. UBSAN: Undefined behaviour in drivers/hwmon/nct6775.c:4179:27 shift exponent 32 is too large for 32-bit type 'long unsigned int' Similar to the original loop, the limit has to be one less than the number of bits. Fixes: cc66b3038254 ("hwmon: (nct6775) Rework temperature source and label handling") Reported-by: Paul Menzel Cc: Paul Menzel Tested-by: Paul Menzel Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/nct6775.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index f5f3f8cf57ea..5f87764d7015 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -4107,7 +4107,7 @@ static int nct6775_probe(struct platform_device *pdev) * The temperature is already monitored if the respective bit in * is set. */ - for (i = 0; i < 32; i++) { + for (i = 0; i < 31; i++) { if (!(data->temp_mask & BIT(i + 1))) continue; if (!reg_temp_alternate[i]) -- GitLab From bf3bb8b549da22221ba0f28110148e1bb6c23228 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 30 May 2018 09:30:42 +0800 Subject: [PATCH 0829/1291] soc: imx: gpcv2: correct PGC offset [ Upstream commit 3637f12faf507b0a4b8ac1e7115fc99583ab1db3 ] Correct MIPI/PCIe/USB_HSIC's PGC offset based on design RTL, the values in the Reference Manual (Rev. 1, 01/2018 and the older ones) are incorrect. The correct offset values should be as below: 0x800 ~ 0x83F: PGC for core0 of A7 platform; 0x840 ~ 0x87F: PGC for core1 of A7 platform; 0x880 ~ 0x8BF: PGC for SCU of A7 platform; 0xA00 ~ 0xA3F: PGC for fastmix/megamix; 0xC00 ~ 0xC3F: PGC for MIPI PHY; 0xC40 ~ 0xC7F: PGC for PCIe_PHY; 0xC80 ~ 0xCBF: PGC for USB OTG1 PHY; 0xCC0 ~ 0xCFF: PGC for USB OTG2 PHY; 0xD00 ~ 0xD3F: PGC for USB HSIC PHY; Signed-off-by: Anson Huang Fixes: 03aa12629fc4 ("soc: imx: Add GPCv2 power gating driver") Acked-by: Andrey Smirnov Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/soc/imx/gpcv2.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/soc/imx/gpcv2.c b/drivers/soc/imx/gpcv2.c index f4e3bd40c72e..6ef18cf8f243 100644 --- a/drivers/soc/imx/gpcv2.c +++ b/drivers/soc/imx/gpcv2.c @@ -39,10 +39,15 @@ #define GPC_M4_PU_PDN_FLG 0x1bc - -#define PGC_MIPI 4 -#define PGC_PCIE 5 -#define PGC_USB_HSIC 8 +/* + * The PGC offset values in Reference Manual + * (Rev. 1, 01/2018 and the older ones) GPC chapter's + * GPC_PGC memory map are incorrect, below offset + * values are from design RTL. + */ +#define PGC_MIPI 16 +#define PGC_PCIE 17 +#define PGC_USB_HSIC 20 #define GPC_PGC_CTRL(n) (0x800 + (n) * 0x40) #define GPC_PGC_SR(n) (GPC_PGC_CTRL(n) + 0xc) -- GitLab From 48168383379f2c8437059a9790d34a20fe95fe88 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 15 Jun 2017 12:57:30 +0300 Subject: [PATCH 0830/1291] usb: dwc3: pci: add support for Intel IceLake [ Upstream commit 00908693c481f7298adf8cf4d2ff3dfbea8c375f ] PCI IDs for Intel IceLake. Signed-off-by: Heikki Krogerus Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index bc5e91d4fac8..09c0454833ad 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -41,6 +41,7 @@ #define PCI_DEVICE_ID_INTEL_GLK 0x31aa #define PCI_DEVICE_ID_INTEL_CNPLP 0x9dee #define PCI_DEVICE_ID_INTEL_CNPH 0xa36e +#define PCI_DEVICE_ID_INTEL_ICLLP 0x34ee #define PCI_INTEL_BXT_DSM_GUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511" #define PCI_INTEL_BXT_FUNC_PMU_PWR 4 @@ -273,6 +274,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_GLK), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CNPLP), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CNPH), }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICLLP), }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), }, { } /* Terminating Entry */ }; -- GitLab From 3572fd055e69a583eff9d7fd5cb8a7736ed27d65 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Wed, 13 Jun 2018 11:05:06 +0000 Subject: [PATCH 0831/1291] usb: gadget: ffs: Fix BUG when userland exits with submitted AIO transfers [ Upstream commit d52e4d0c0c428bf2ba35074a7495cdb28e2efbae ] This bug happens only when the UDC needs to sleep during usb_ep_dequeue, as is the case for (at least) dwc3. [ 382.200896] BUG: scheduling while atomic: screen/1808/0x00000100 [ 382.207124] 4 locks held by screen/1808: [ 382.211266] #0: (rcu_callback){....}, at: [] rcu_process_callbacks+0x260/0x440 [ 382.219949] #1: (rcu_read_lock_sched){....}, at: [] percpu_ref_switch_to_atomic_rcu+0xb0/0x130 [ 382.230034] #2: (&(&ctx->ctx_lock)->rlock){....}, at: [] free_ioctx_users+0x23/0xd0 [ 382.230096] #3: (&(&ffs->eps_lock)->rlock){....}, at: [] ffs_aio_cancel+0x20/0x60 [usb_f_fs] [ 382.230160] Modules linked in: usb_f_fs libcomposite configfs bnep btsdio bluetooth ecdh_generic brcmfmac brcmutil intel_powerclamp coretemp dwc3 kvm_intel ulpi udc_core kvm irqbypass crc32_pclmul crc32c_intel pcbc dwc3_pci aesni_intel aes_i586 crypto_simd cryptd ehci_pci ehci_hcd gpio_keys usbcore basincove_gpadc industrialio usb_common [ 382.230407] CPU: 1 PID: 1808 Comm: screen Not tainted 4.14.0-edison+ #117 [ 382.230416] Hardware name: Intel Corporation Merrifield/BODEGA BAY, BIOS 542 2015.01.21:18.19.48 [ 382.230425] Call Trace: [ 382.230438] [ 382.230466] dump_stack+0x47/0x62 [ 382.230498] __schedule_bug+0x61/0x80 [ 382.230522] __schedule+0x43/0x7a0 [ 382.230587] schedule+0x5f/0x70 [ 382.230625] dwc3_gadget_ep_dequeue+0x14c/0x270 [dwc3] [ 382.230669] ? do_wait_intr_irq+0x70/0x70 [ 382.230724] usb_ep_dequeue+0x19/0x90 [udc_core] [ 382.230770] ffs_aio_cancel+0x37/0x60 [usb_f_fs] [ 382.230798] kiocb_cancel+0x31/0x40 [ 382.230822] free_ioctx_users+0x4d/0xd0 [ 382.230858] percpu_ref_switch_to_atomic_rcu+0x10a/0x130 [ 382.230881] ? percpu_ref_exit+0x40/0x40 [ 382.230904] rcu_process_callbacks+0x2b3/0x440 [ 382.230965] __do_softirq+0xf8/0x26b [ 382.231011] ? __softirqentry_text_start+0x8/0x8 [ 382.231033] do_softirq_own_stack+0x22/0x30 [ 382.231042] [ 382.231071] irq_exit+0x45/0xc0 [ 382.231089] smp_apic_timer_interrupt+0x13c/0x150 [ 382.231118] apic_timer_interrupt+0x35/0x3c [ 382.231132] EIP: __copy_user_ll+0xe2/0xf0 [ 382.231142] EFLAGS: 00210293 CPU: 1 [ 382.231154] EAX: bfd4508c EBX: 00000004 ECX: 00000003 EDX: f3d8fe50 [ 382.231165] ESI: f3d8fe51 EDI: bfd4508d EBP: f3d8fe14 ESP: f3d8fe08 [ 382.231176] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 382.231265] core_sys_select+0x25f/0x320 [ 382.231346] ? __wake_up_common_lock+0x62/0x80 [ 382.231399] ? tty_ldisc_deref+0x13/0x20 [ 382.231438] ? ldsem_up_read+0x1b/0x40 [ 382.231459] ? tty_ldisc_deref+0x13/0x20 [ 382.231479] ? tty_write+0x29f/0x2e0 [ 382.231514] ? n_tty_ioctl+0xe0/0xe0 [ 382.231541] ? tty_write_unlock+0x30/0x30 [ 382.231566] ? __vfs_write+0x22/0x110 [ 382.231604] ? security_file_permission+0x2f/0xd0 [ 382.231635] ? rw_verify_area+0xac/0x120 [ 382.231677] ? vfs_write+0x103/0x180 [ 382.231711] SyS_select+0x87/0xc0 [ 382.231739] ? SyS_write+0x42/0x90 [ 382.231781] do_fast_syscall_32+0xd6/0x1a0 [ 382.231836] entry_SYSENTER_32+0x47/0x71 [ 382.231848] EIP: 0xb7f75b05 [ 382.231857] EFLAGS: 00000246 CPU: 1 [ 382.231868] EAX: ffffffda EBX: 00000400 ECX: bfd4508c EDX: bfd4510c [ 382.231878] ESI: 00000000 EDI: 00000000 EBP: 00000000 ESP: bfd45020 [ 382.231889] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b [ 382.232281] softirq: huh, entered softirq 9 RCU c10b4d90 with preempt_count 00000100, exited with 00000000? Tested-by: Sam Protsenko Signed-off-by: Vincent Pelletier Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 52e6897fa35a..17467545391b 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -219,6 +219,7 @@ struct ffs_io_data { struct mm_struct *mm; struct work_struct work; + struct work_struct cancellation_work; struct usb_ep *ep; struct usb_request *req; @@ -1073,22 +1074,31 @@ ffs_epfile_open(struct inode *inode, struct file *file) return 0; } +static void ffs_aio_cancel_worker(struct work_struct *work) +{ + struct ffs_io_data *io_data = container_of(work, struct ffs_io_data, + cancellation_work); + + ENTER(); + + usb_ep_dequeue(io_data->ep, io_data->req); +} + static int ffs_aio_cancel(struct kiocb *kiocb) { struct ffs_io_data *io_data = kiocb->private; - struct ffs_epfile *epfile = kiocb->ki_filp->private_data; + struct ffs_data *ffs = io_data->ffs; int value; ENTER(); - spin_lock_irq(&epfile->ffs->eps_lock); - - if (likely(io_data && io_data->ep && io_data->req)) - value = usb_ep_dequeue(io_data->ep, io_data->req); - else + if (likely(io_data && io_data->ep && io_data->req)) { + INIT_WORK(&io_data->cancellation_work, ffs_aio_cancel_worker); + queue_work(ffs->io_completion_wq, &io_data->cancellation_work); + value = -EINPROGRESS; + } else { value = -EINVAL; - - spin_unlock_irq(&epfile->ffs->eps_lock); + } return value; } -- GitLab From 7b2dc4515f030658f9cae3f1d7e294ab8238e39f Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 12 Jun 2018 12:37:29 +0400 Subject: [PATCH 0832/1291] usb: dwc2: gadget: Fix issue in dwc2_gadget_start_isoc() [ Upstream commit 1ffba9058737af2ddeebc813faa8ea9b16bc892a ] In case of requests queue is empty reset EP target_frame to initial value. This allow restarting ISOC traffic in case when function driver queued requests with interruptions. Tested-by: Zeng Tao Signed-off-by: Minas Harutyunyan Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 6ef001a83fe2..7c6aa505fb68 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -932,6 +932,7 @@ static void dwc2_gadget_start_isoc_ddma(struct dwc2_hsotg_ep *hs_ep) u32 ctrl; if (list_empty(&hs_ep->queue)) { + hs_ep->target_frame = TARGET_FRAME_INITIAL; dev_dbg(hsotg->dev, "%s: No requests in queue\n", __func__); return; } -- GitLab From 987a7dab8308be0636b020444bd42910abf68dbc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 31 May 2018 16:45:52 +0200 Subject: [PATCH 0833/1291] usb: dwc3: of-simple: fix use-after-free on remove [ Upstream commit 896e518883f18e601335908192e33426c1f599a4 ] The clocks have already been explicitly disabled and put as part of remove() so the runtime suspend callback must not be run when balancing the runtime PM usage count before returning. Fixes: 16adc674d0d6 ("usb: dwc3: add generic OF glue layer") Signed-off-by: Johan Hovold Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-of-simple.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c index fbfc09ebd2ec..acf41ba3638d 100644 --- a/drivers/usb/dwc3/dwc3-of-simple.c +++ b/drivers/usb/dwc3/dwc3-of-simple.c @@ -132,8 +132,9 @@ static int dwc3_of_simple_remove(struct platform_device *pdev) of_platform_depopulate(dev); - pm_runtime_put_sync(dev); pm_runtime_disable(dev); + pm_runtime_put_noidle(dev); + pm_runtime_set_suspended(dev); return 0; } -- GitLab From dcf6f268deee44671fa9b320b1ba5e9648a5277c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 18 Jun 2018 14:17:16 +0300 Subject: [PATCH 0834/1291] ACPI / EC: Use ec_no_wakeup on Thinkpad X1 Carbon 6th [ Upstream commit 8195a655e5ce09550aff81b2573d9b015d520cb9 ] On this system EC interrupt triggers constantly kicking devices out of low power states and thus blocking power management. The system also has a PCIe root port hosting Alpine Ridge Thunderbolt controller and it never gets a chance to go to D3cold because of this. Since the power button works the same regardless if EC interrupt is enabled or not during s2idle, add a quirk for this machine that sets ec_no_wakeup=true preventing spurious wakeups. Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/ec.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 58bc28aff3aa..355add64681b 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -2029,6 +2029,17 @@ static inline void acpi_ec_query_exit(void) } } +static const struct dmi_system_id acpi_ec_no_wakeup[] = { + { + .ident = "Thinkpad X1 Carbon 6th", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20KGS3JF01"), + }, + }, + { }, +}; + int __init acpi_ec_init(void) { int result; @@ -2039,6 +2050,15 @@ int __init acpi_ec_init(void) if (result) return result; + /* + * Disable EC wakeup on following systems to prevent periodic + * wakeup from EC GPE. + */ + if (dmi_check_system(acpi_ec_no_wakeup)) { + ec_no_wakeup = true; + pr_debug("Disabling EC wakeup on suspend-to-idle\n"); + } + /* Drivers must be started after acpi_ec_query_init() */ dsdt_fail = acpi_bus_register_driver(&acpi_ec_driver); /* -- GitLab From 69c471908ddd7b9927aa0735766d486cbf094cab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 13 Jun 2018 10:11:56 -0700 Subject: [PATCH 0835/1291] netfilter: ipv6: nf_defrag: reduce struct net memory waste [ Upstream commit 9ce7bc036ae4cfe3393232c86e9e1fea2153c237 ] It is a waste of memory to use a full "struct netns_sysctl_ipv6" while only one pointer is really used, considering netns_sysctl_ipv6 keeps growing. Also, since "struct netns_frags" has cache line alignment, it is better to move the frags_hdr pointer outside, otherwise we spend a full cache line for this pointer. This saves 192 bytes of memory per netns. Fixes: c038a767cd69 ("ipv6: add a new namespace for nf_conntrack_reasm") Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/net/net_namespace.h | 1 + include/net/netns/ipv6.h | 1 - net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +++--- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 049008493faf..f4bf75fac349 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -120,6 +120,7 @@ struct net { #endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag nf_frag; + struct ctl_table_header *nf_frag_frags_hdr; #endif struct sock *nfnl; struct sock *nfnl_stash; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index dc825a5ddd7f..c004d051c2d3 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -94,7 +94,6 @@ struct netns_ipv6 { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag { - struct netns_sysctl_ipv6 sysctl; struct netns_frags frags; }; #endif diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 722a9db8c6a7..ee33a6743f3b 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -117,7 +117,7 @@ static int nf_ct_frag6_sysctl_register(struct net *net) if (hdr == NULL) goto err_reg; - net->nf_frag.sysctl.frags_hdr = hdr; + net->nf_frag_frags_hdr = hdr; return 0; err_reg: @@ -131,8 +131,8 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) { struct ctl_table *table; - table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg; - unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr); + table = net->nf_frag_frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->nf_frag_frags_hdr); if (!net_eq(net, &init_net)) kfree(table); } -- GitLab From 175b38277b6d9215ef9c854d666b6223873445e1 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Wed, 13 Jun 2018 12:26:13 +0800 Subject: [PATCH 0836/1291] netfilter: nf_ct_helper: Fix possible panic after nf_conntrack_helper_unregister [ Upstream commit ad9852af97587b8abe8102f9ddcb05c9769656f6 ] The helper module would be unloaded after nf_conntrack_helper_unregister, so it may cause a possible panic caused by race. nf_ct_iterate_destroy(unhelp, me) reset the helper of conntrack as NULL, but maybe someone has gotten the helper pointer during this period. Then it would panic, when it accesses the helper and the module was unloaded. Take an example as following: CPU0 CPU1 ctnetlink_dump_helpinfo helper = rcu_dereference(help->helper); unhelp set helper as NULL unload helper module helper->to_nlattr(skb, ct); As above, the cpu0 tries to access the helper and its module is unloaded, then the panic happens. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_helper.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 551a1eddf0fa..a75b11c39312 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -465,6 +465,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_iterate_destroy(unhelp, me); + + /* Maybe someone has gotten the helper already when unhelp above. + * So need to wait it. + */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); -- GitLab From 81810e45384b41c2669edb4adb9cacd65719222a Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Tue, 12 Jun 2018 16:46:03 -0600 Subject: [PATCH 0837/1291] selftests: pstore: return Kselftest Skip code for skipped tests [ Upstream commit 856e7c4b619af622d56b3b454f7bec32a170ac99 ] When pstore_post_reboot test gets skipped because of unmet dependencies and/or unsupported configuration, it returns 0 which is treated as a pass by the Kselftest framework. This leads to false positive result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Reviewed-by: Kees Cook Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/pstore/pstore_post_reboot_tests | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/pstore/pstore_post_reboot_tests b/tools/testing/selftests/pstore/pstore_post_reboot_tests index 6ccb154cb4aa..22f8df1ad7d4 100755 --- a/tools/testing/selftests/pstore/pstore_post_reboot_tests +++ b/tools/testing/selftests/pstore/pstore_post_reboot_tests @@ -7,13 +7,16 @@ # # Released under the terms of the GPL v2. +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + . ./common_tests if [ -e $REBOOT_FLAG ]; then rm $REBOOT_FLAG else prlog "pstore_crash_test has not been executed yet. we skip further tests." - exit 0 + exit $ksft_skip fi prlog -n "Mounting pstore filesystem ... " -- GitLab From bc5e458a9ee234b5454e91ee56dc1fb870f9437f Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Tue, 12 Jun 2018 17:40:31 -0600 Subject: [PATCH 0838/1291] selftests: static_keys: return Kselftest Skip code for skipped tests [ Upstream commit 8781578087b8fb8829558bac96c3c24e5ba26f82 ] When static_keys test is skipped because of unmet dependencies and/or unsupported configuration, it exits with error which is treated as a fail by the Kselftest framework. This leads to false negative result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Added an explicit searches for test_static_key_base and test_static_keys modules and return skip code if they aren't found to differentiate between the failure to load the module condition and module not found condition. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../selftests/static_keys/test_static_keys.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh index 24cff498b31a..fc9f8cde7d42 100755 --- a/tools/testing/selftests/static_keys/test_static_keys.sh +++ b/tools/testing/selftests/static_keys/test_static_keys.sh @@ -2,6 +2,19 @@ # SPDX-License-Identifier: GPL-2.0 # Runs static keys kernel module tests +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if ! /sbin/modprobe -q -n test_static_key_base; then + echo "static_key: module test_static_key_base is not found [SKIP]" + exit $ksft_skip +fi + +if ! /sbin/modprobe -q -n test_static_keys; then + echo "static_key: module test_static_keys is not found [SKIP]" + exit $ksft_skip +fi + if /sbin/modprobe -q test_static_key_base; then if /sbin/modprobe -q test_static_keys; then echo "static_key: ok" -- GitLab From d5f9d1d350bc452c1fb5f4a184ccc6dd74a2946d Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Tue, 12 Jun 2018 18:11:37 -0600 Subject: [PATCH 0839/1291] selftests: sysctl: return Kselftest Skip code for skipped tests [ Upstream commit c7db6ffb831fd36a03485a0d88b1e505378975ad ] When sysctl test is skipped because of unmet dependencies and/or unsupported configuration, it exits with error which is treated as a fail by the Kselftest framework. This leads to false negative result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Changed return code to kselftest skip code in skip error legs that check requirements and module probe test error leg. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Reviewed-by: Kees Cook Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/sysctl/sysctl.sh | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh index ec232c3cfcaa..584eb8ea780a 100755 --- a/tools/testing/selftests/sysctl/sysctl.sh +++ b/tools/testing/selftests/sysctl/sysctl.sh @@ -14,6 +14,9 @@ # This performs a series tests against the proc sysctl interface. +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + TEST_NAME="sysctl" TEST_DRIVER="test_${TEST_NAME}" TEST_DIR=$(dirname $0) @@ -41,7 +44,7 @@ test_modprobe() echo "$0: $DIR not present" >&2 echo "You must have the following enabled in your kernel:" >&2 cat $TEST_DIR/config >&2 - exit 1 + exit $ksft_skip fi } @@ -98,28 +101,30 @@ test_reqs() uid=$(id -u) if [ $uid -ne 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi if ! which perl 2> /dev/null > /dev/null; then echo "$0: You need perl installed" - exit 1 + exit $ksft_skip fi if ! which getconf 2> /dev/null > /dev/null; then echo "$0: You need getconf installed" - exit 1 + exit $ksft_skip fi if ! which diff 2> /dev/null > /dev/null; then echo "$0: You need diff installed" - exit 1 + exit $ksft_skip fi } function load_req_mod() { - trap "test_modprobe" EXIT - if [ ! -d $DIR ]; then + if ! modprobe -q -n $TEST_DRIVER; then + echo "$0: module $TEST_DRIVER not found [SKIP]" + exit $ksft_skip + fi modprobe $TEST_DRIVER if [ $? -ne 0 ]; then exit @@ -765,6 +770,7 @@ function parse_args() test_reqs allow_user_defaults check_production_sysctl_writes_strict +test_modprobe load_req_mod trap "test_finish" EXIT -- GitLab From 41325fa657b96b4c33fee8c7c63e00175fab538a Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Wed, 13 Jun 2018 21:10:48 -0600 Subject: [PATCH 0840/1291] selftests: user: return Kselftest Skip code for skipped tests [ Upstream commit d7d5311d4aa9611fe1a5a851e6f75733237a668a ] When user test is skipped because of unmet dependencies and/or unsupported configuration, it exits with error which is treated as a fail by the Kselftest framework. This leads to false negative result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Add an explicit check for module presence and return skip code if module isn't present. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/user/test_user_copy.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/user/test_user_copy.sh b/tools/testing/selftests/user/test_user_copy.sh index d60506fc77f8..f9b31a57439b 100755 --- a/tools/testing/selftests/user/test_user_copy.sh +++ b/tools/testing/selftests/user/test_user_copy.sh @@ -2,6 +2,13 @@ # SPDX-License-Identifier: GPL-2.0 # Runs copy_to/from_user infrastructure using test_user_copy kernel module +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if ! /sbin/modprobe -q -n test_user_copy; then + echo "user: module test_user_copy is not found [SKIP]" + exit $ksft_skip +fi if /sbin/modprobe -q test_user_copy; then /sbin/modprobe -q -r test_user_copy echo "user_copy: ok" -- GitLab From 3a9907ebf83b379b15a44a2597066868d1c958e1 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Thu, 14 Jun 2018 16:56:13 -0600 Subject: [PATCH 0841/1291] selftests: zram: return Kselftest Skip code for skipped tests [ Upstream commit 685814466bf8398192cf855415a0bb2cefc1930e ] When zram test is skipped because of unmet dependencies and/or unsupported configuration, it exits with error which is treated as a fail by the Kselftest framework. This leads to false negative result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/zram/zram.sh | 5 ++++- tools/testing/selftests/zram/zram_lib.sh | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh index 754de7da426a..232e958ec454 100755 --- a/tools/testing/selftests/zram/zram.sh +++ b/tools/testing/selftests/zram/zram.sh @@ -2,6 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 TCID="zram.sh" +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + . ./zram_lib.sh run_zram () { @@ -24,5 +27,5 @@ elif [ -b /dev/zram0 ]; then else echo "$TCID : No zram.ko module or /dev/zram0 device file not found" echo "$TCID : CONFIG_ZRAM is not set" - exit 1 + exit $ksft_skip fi diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index f6a9c73e7a44..9e73a4fb9b0a 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -18,6 +18,9 @@ MODULE=0 dev_makeswap=-1 dev_mounted=-1 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + trap INT check_prereqs() @@ -27,7 +30,7 @@ check_prereqs() if [ $uid -ne 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi } -- GitLab From a626c95346e1dfb93993975900088c4908a1a327 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Wed, 13 Jun 2018 21:31:43 -0600 Subject: [PATCH 0842/1291] selftests: vm: return Kselftest Skip code for skipped tests [ Upstream commit a4d7537789724985cafbc9260a31ca4f2b7cf123 ] When vm test is skipped because of unmet dependencies and/or unsupported configuration, it exits with error which is treated as a fail by the Kselftest framework. This leads to false negative result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Acked-by: Mike Rapoport Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/vm/compaction_test.c | 4 +++- tools/testing/selftests/vm/mlock2-tests.c | 12 +++++++----- tools/testing/selftests/vm/run_vmtests | 5 ++++- tools/testing/selftests/vm/userfaultfd.c | 4 +++- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c index 1097f04e4d80..bcec71250873 100644 --- a/tools/testing/selftests/vm/compaction_test.c +++ b/tools/testing/selftests/vm/compaction_test.c @@ -16,6 +16,8 @@ #include #include +#include "../kselftest.h" + #define MAP_SIZE 1048576 struct map_list { @@ -169,7 +171,7 @@ int main(int argc, char **argv) printf("Either the sysctl compact_unevictable_allowed is not\n" "set to 1 or couldn't read the proc file.\n" "Skipping the test\n"); - return 0; + return KSFT_SKIP; } lim.rlim_cur = RLIM_INFINITY; diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c index 4997b9222cfa..637b6d0ac0d0 100644 --- a/tools/testing/selftests/vm/mlock2-tests.c +++ b/tools/testing/selftests/vm/mlock2-tests.c @@ -9,6 +9,8 @@ #include #include "mlock2.h" +#include "../kselftest.h" + struct vm_boundaries { unsigned long start; unsigned long end; @@ -303,7 +305,7 @@ static int test_mlock_lock() if (mlock2_(map, 2 * page_size, 0)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("mlock2(0)"); goto unmap; @@ -412,7 +414,7 @@ static int test_mlock_onfault() if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("mlock2(MLOCK_ONFAULT)"); goto unmap; @@ -425,7 +427,7 @@ static int test_mlock_onfault() if (munlock(map, 2 * page_size)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("munlock()"); goto unmap; @@ -457,7 +459,7 @@ static int test_lock_onfault_of_present() if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("mlock2(MLOCK_ONFAULT)"); goto unmap; @@ -583,7 +585,7 @@ static int test_vma_management(bool call_mlock) if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("mlock(ONFAULT)\n"); goto out; diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index 45708aa3ce47..57ab6ac0d4a4 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -2,6 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 #please run as root +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + mnt=./huge exitcode=0 @@ -36,7 +39,7 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages if [ $? -ne 0 ]; then echo "Please run this test as root" - exit 1 + exit $ksft_skip fi while read name size unit; do if [ "$name" = "HugePages_Free:" ]; then diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index de2f9ec8a87f..7b8171e3128a 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -69,6 +69,8 @@ #include #include +#include "../kselftest.h" + #ifdef __NR_userfaultfd static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; @@ -1322,7 +1324,7 @@ int main(int argc, char **argv) int main(void) { printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); - return 0; + return KSFT_SKIP; } #endif /* __NR_userfaultfd */ -- GitLab From 0104d49dc06f196a0102290afc783dc04975187c Mon Sep 17 00:00:00 2001 From: Fathi Boudra Date: Thu, 14 Jun 2018 11:57:08 +0200 Subject: [PATCH 0843/1291] selftests: sync: add config fragment for testing sync framework [ Upstream commit d6a3e55131fcb1e5ca1753f4b6f297a177b2fc91 ] Unless the software synchronization objects (CONFIG_SW_SYNC) is enabled, the sync test will be skipped: TAP version 13 1..0 # Skipped: Sync framework not supported by kernel Add a config fragment file to be able to run "make kselftest-merge" to enable relevant configuration required in order to run the sync test. Signed-off-by: Fathi Boudra Link: https://lkml.org/lkml/2017/5/5/14 Signed-off-by: Anders Roxell Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/sync/config | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tools/testing/selftests/sync/config diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config new file mode 100644 index 000000000000..1ab7e8130db2 --- /dev/null +++ b/tools/testing/selftests/sync/config @@ -0,0 +1,4 @@ +CONFIG_STAGING=y +CONFIG_ANDROID=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y -- GitLab From 7ab23e15c7d209bb8ae9a265e9ea397fde9957a7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 11 Jun 2018 15:47:12 -0700 Subject: [PATCH 0844/1291] ARM: dts: NSP: Fix i2c controller interrupt type [ Upstream commit a3e32e78a40017756c71ef6dad429ffe3301126a ] The i2c controller should use IRQ_TYPE_LEVEL_HIGH instead of IRQ_TYPE_NONE. Fixes: 0f9f27a36d09 ("ARM: dts: NSP: Add I2C support to the DT") Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm-nsp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index d5f5e92e7488..394e8806110c 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -391,7 +391,7 @@ i2c0: i2c@38000 { reg = <0x38000 0x50>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; dma-coherent; status = "disabled"; -- GitLab From d8ff67605d54ff6153fbd045d6d5cd821396a606 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 11 Jun 2018 15:47:13 -0700 Subject: [PATCH 0845/1291] ARM: dts: NSP: Fix PCIe controllers interrupt types [ Upstream commit 403fde644855bc71318c8db65646383e22653b13 ] The interrupts for the PCIe controllers should all be of type IRQ_TYPE_LEVEL_HIGH instead of IRQ_TYPE_NONE. Fixes: d71eb9412088 ("ARM: dts: NSP: Add MSI support on PCI") Fixes: 522199029fdc ("ARM: dts: NSP: Fix PCIE DT issue") Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm-nsp.dtsi | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index 394e8806110c..1792192001a2 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -496,7 +496,7 @@ pcie0: pcie@18012000 { #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 131 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <0>; @@ -519,10 +519,10 @@ msi0: msi-controller { compatible = "brcm,iproc-msi"; msi-controller; interrupt-parent = <&gic>; - interrupts = , - , - , - ; + interrupts = , + , + , + ; brcm,pcie-msi-inten; }; }; @@ -533,7 +533,7 @@ pcie1: pcie@18013000 { #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 137 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <1>; @@ -556,10 +556,10 @@ msi1: msi-controller { compatible = "brcm,iproc-msi"; msi-controller; interrupt-parent = <&gic>; - interrupts = , - , - , - ; + interrupts = , + , + , + ; brcm,pcie-msi-inten; }; }; @@ -570,7 +570,7 @@ pcie2: pcie@18014000 { #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 143 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <2>; @@ -593,10 +593,10 @@ msi2: msi-controller { compatible = "brcm,iproc-msi"; msi-controller; interrupt-parent = <&gic>; - interrupts = , - , - , - ; + interrupts = , + , + , + ; brcm,pcie-msi-inten; }; }; -- GitLab From 2d4ac6f1837b874c475005fc29335c4f1d55faa2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 11 Jun 2018 15:53:40 -0700 Subject: [PATCH 0846/1291] ARM: dts: BCM5301x: Fix i2c controller interrupt type [ Upstream commit a0a8338e905734518ab9b10b06e7fd0201228f8b ] The i2c controller should be using IRQ_TYPE_LEVEL_HIGH, fix that. Fixes: bb097e3e0045 ("ARM: dts: BCM5301X: Add I2C support to the DT") Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm5301x.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi index 045b9bb857f9..501877e87a5b 100644 --- a/arch/arm/boot/dts/bcm5301x.dtsi +++ b/arch/arm/boot/dts/bcm5301x.dtsi @@ -365,7 +365,7 @@ mdio: mdio@18003000 { i2c0: i2c@18009000 { compatible = "brcm,iproc-i2c"; reg = <0x18009000 0x50>; - interrupts = ; + interrupts = ; #address-cells = <1>; #size-cells = <0>; clock-frequency = <100000>; -- GitLab From 6542fcfbd4c5a82d05c1653e3ba9583971536121 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Tue, 12 Jun 2018 13:21:27 -0700 Subject: [PATCH 0847/1291] ARM: dts: Cygnus: Fix I2C controller interrupt type [ Upstream commit 71ca3409703b62b6a092d0d9d13f366c121bc5d3 ] Fix I2C controller interrupt to use IRQ_TYPE_LEVEL_HIGH for Broadcom Cygnus SoC. Fixes: b51c05a331ff ("ARM: dts: add I2C device nodes for Broadcom Cygnus") Signed-off-by: Ray Jui Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm-cygnus.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi index 9a9902974b1b..4a80f5e9c8b0 100644 --- a/arch/arm/boot/dts/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi @@ -216,7 +216,7 @@ i2c0: i2c@18008000 { reg = <0x18008000 0x100>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; status = "disabled"; }; @@ -245,7 +245,7 @@ i2c1: i2c@1800b000 { reg = <0x1800b000 0x100>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; status = "disabled"; }; -- GitLab From eab85e8bd4866595f3ab18f96dee6c7c02699313 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Tue, 12 Jun 2018 13:21:28 -0700 Subject: [PATCH 0848/1291] ARM: dts: Cygnus: Fix PCIe controller interrupt type [ Upstream commit 6cb1628ad3506b315cdddd7676db0ff2af378d28 ] Fix PCIe controller interrupt to use IRQ_TYPE_LEVEL_HIGH for Broadcom Cygnus SoC Fixes: cd590b50a936 ("ARM: dts: enable PCIe support for Cygnus") Fixes: f6b889358a82 ("ARM: dts: Enable MSI support for Broadcom Cygnus") Signed-off-by: Ray Jui Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm-cygnus.dtsi | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi index 4a80f5e9c8b0..8b2c65cd61a2 100644 --- a/arch/arm/boot/dts/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi @@ -256,7 +256,7 @@ pcie0: pcie@18012000 { #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 100 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <0>; @@ -278,10 +278,10 @@ msi0: msi-controller { compatible = "brcm,iproc-msi"; msi-controller; interrupt-parent = <&gic>; - interrupts = , - , - , - ; + interrupts = , + , + , + ; }; }; @@ -291,7 +291,7 @@ pcie1: pcie@18013000 { #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 106 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <1>; @@ -313,10 +313,10 @@ msi1: msi-controller { compatible = "brcm,iproc-msi"; msi-controller; interrupt-parent = <&gic>; - interrupts = , - , - , - ; + interrupts = , + , + , + ; }; }; -- GitLab From 31b16a0302be55e0915b270987c1429ee919313b Mon Sep 17 00:00:00 2001 From: Scott Branden Date: Fri, 18 May 2018 08:21:20 -0700 Subject: [PATCH 0849/1291] arm64: dts: specify 1.8V EMMC capabilities for bcm958742k [ Upstream commit eba92503e980c08ac353d0d669d0bb143979abcd ] Specify 1.8V EMMC capabilities for bcm958742k board to indicate support for UHS mode. Fixes: d4b4aba6be8a ("arm64: dts: Initial DTS files for Broadcom Stingray SOC") Signed-off-by: Scott Branden Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/broadcom/stingray/bcm958742k.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/broadcom/stingray/bcm958742k.dts b/arch/arm64/boot/dts/broadcom/stingray/bcm958742k.dts index eb6f08cdbd79..77efa28c4dd5 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/bcm958742k.dts +++ b/arch/arm64/boot/dts/broadcom/stingray/bcm958742k.dts @@ -43,6 +43,10 @@ &gphy0 { enet-phy-lane-swap; }; +&sdio0 { + mmc-ddr-1_8v; +}; + &uart2 { status = "okay"; }; -- GitLab From 69347a9150c9b8a053fd791aaaec92fd661c4f0f Mon Sep 17 00:00:00 2001 From: Scott Branden Date: Tue, 22 May 2018 10:01:39 -0700 Subject: [PATCH 0850/1291] arm64: dts: specify 1.8V EMMC capabilities for bcm958742t [ Upstream commit 37c2bd81a86ebb1cc934bf52a29c33d6f9abff7f ] Specify 1.8V EMMC capabilities for bcm958742t board to indicate support for UHS mode. Fixes: d4b4aba6be8a ("arm64: dts: Initial DTS files for Broadcom Stingray SOC") Signed-off-by: Scott Branden Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/broadcom/stingray/bcm958742t.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/broadcom/stingray/bcm958742t.dts b/arch/arm64/boot/dts/broadcom/stingray/bcm958742t.dts index 5084b037320f..55ba495ef56e 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/bcm958742t.dts +++ b/arch/arm64/boot/dts/broadcom/stingray/bcm958742t.dts @@ -42,3 +42,7 @@ / { &gphy0 { enet-phy-lane-swap; }; + +&sdio0 { + mmc-ddr-1_8v; +}; -- GitLab From db0b1a4b0772b52f7707269a7c6dc9d636d5f47c Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Tue, 12 Jun 2018 13:21:29 -0700 Subject: [PATCH 0851/1291] arm64: dts: ns2: Fix I2C controller interrupt type [ Upstream commit e605c287deed45624e8d35a15e3f0b4faab1a62d ] Fix I2C controller interrupt to use IRQ_TYPE_LEVEL_HIGH for Broadcom NS2 SoC. Fixes: 7ac674e8df7a ("arm64: dts: Add I2C nodes for NS2") Signed-off-by: Ray Jui Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index 35c8457e3d1f..8124495558be 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -566,7 +566,7 @@ i2c0: i2c@66080000 { reg = <0x66080000 0x100>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; status = "disabled"; }; @@ -594,7 +594,7 @@ i2c1: i2c@660b0000 { reg = <0x660b0000 0x100>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; status = "disabled"; }; -- GitLab From 22dc4f2b8b46fb10d6139b3d23cba2a48bf892b9 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Tue, 12 Jun 2018 13:21:30 -0700 Subject: [PATCH 0852/1291] arm64: dts: ns2: Fix PCIe controller interrupt type [ Upstream commit d0b8aed9e80ab526dbb04020bfc94ecea7bddb44 ] Fix PCIe controller interrupt to use IRQ_TYPE_LEVEL_HIGH for Broadcom NS2 SoC. Fixes: fd5e5dd56a2f ("arm64: dts: Add PCIe0 and PCIe4 DT nodes for NS2") Signed-off-by: Ray Jui Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index 8124495558be..0b72094bcf5a 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -118,7 +118,7 @@ pcie0: pcie@20020000 { #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic 0 GIC_SPI 281 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic 0 GIC_SPI 281 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <0>; @@ -149,7 +149,7 @@ pcie4: pcie@50020000 { #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic 0 GIC_SPI 305 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic 0 GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <4>; -- GitLab From fec2c747ae39db840c68dd3288cc10c144a55eec Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Tue, 12 Jun 2018 13:21:31 -0700 Subject: [PATCH 0853/1291] arm64: dts: Stingray: Fix I2C controller interrupt type [ Upstream commit 75af23c4736c5633894ea0baf9bca1cf6b248ca4 ] Fix I2C controller interrupt to use IRQ_TYPE_LEVEL_HIGH for Broadcom Stingray SoC. Fixes: 1256ea18875d ("arm64: dts: Add I2C DT nodes for Stingray SoC") Signed-off-by: Ray Jui Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi index e6f75c633623..2b76293b51c8 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi @@ -409,7 +409,7 @@ i2c0: i2c@000b0000 { reg = <0x000b0000 0x100>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; status = "disabled"; }; @@ -453,7 +453,7 @@ i2c1: i2c@000e0000 { reg = <0x000e0000 0x100>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; status = "disabled"; }; -- GitLab From 997a6b6b3017a914b0707676119848c1af69921d Mon Sep 17 00:00:00 2001 From: Hoan Tran Date: Thu, 7 Jun 2018 14:35:01 +0100 Subject: [PATCH 0854/1291] drivers/perf: xgene_pmu: Fix IOB SLOW PMU parser error [ Upstream commit a45fc268db20ecd859bb61e25045912b3194b5e6 ] This patch fixes the below parser error of the IOB SLOW PMU. # perf stat -a -e iob-slow0/cycle-count/ sleep 1 evenf syntax error: 'iob-slow0/cycle-count/' \___ parser error It replaces the "-" character by "_" character inside the PMU name. Signed-off-by: Hoan Tran Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/perf/xgene_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index eb23311bc70c..8b79c2f7931f 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1463,7 +1463,7 @@ static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id) case PMU_TYPE_IOB: return devm_kasprintf(dev, GFP_KERNEL, "iob%d", id); case PMU_TYPE_IOB_SLOW: - return devm_kasprintf(dev, GFP_KERNEL, "iob-slow%d", id); + return devm_kasprintf(dev, GFP_KERNEL, "iob_slow%d", id); case PMU_TYPE_MCB: return devm_kasprintf(dev, GFP_KERNEL, "mcb%d", id); case PMU_TYPE_MC: -- GitLab From 0a43cd3ea76ad9b438089c7c9fab4ea98a94ef39 Mon Sep 17 00:00:00 2001 From: Alison Wang Date: Tue, 24 Apr 2018 10:42:32 +0800 Subject: [PATCH 0855/1291] drm: mali-dp: Enable Global SE interrupts mask for DP500 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 89610dc2c235e7b02bb9fba0ce247e12d4dde7cd ] In the situation that DE and SE aren’t shared the same interrupt number, the Global SE interrupts mask bit MASK_IRQ_EN in MASKIRQ must be set, or else other mask bits will not work and no SE interrupt will occur. This patch enables MASK_IRQ_EN for SE to fix this problem. Signed-off-by: Alison Wang Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/arm/malidp_hw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c index 17bca99e8ac8..7e2c341dfe5f 100644 --- a/drivers/gpu/drm/arm/malidp_hw.c +++ b/drivers/gpu/drm/arm/malidp_hw.c @@ -634,7 +634,8 @@ const struct malidp_hw_device malidp_device[MALIDP_MAX_DEVICES] = { .vsync_irq = MALIDP500_DE_IRQ_VSYNC, }, .se_irq_map = { - .irq_mask = MALIDP500_SE_IRQ_CONF_MODE, + .irq_mask = MALIDP500_SE_IRQ_CONF_MODE | + MALIDP500_SE_IRQ_GLOBAL, .vsync_irq = 0, }, .dc_irq_map = { -- GitLab From 6bcd5cdf3a36e3adf211c0f90fc83a5f0b4d83d2 Mon Sep 17 00:00:00 2001 From: Ayan Kumar Halder Date: Tue, 10 Apr 2018 19:25:03 +0100 Subject: [PATCH 0856/1291] drm/arm/malidp: Preserve LAYER_FORMAT contents when setting format [ Upstream commit ad7fda2e378f4356df621a39655f7c200b495d81 ] On some Mali-DP processors, the LAYER_FORMAT register contains fields other than the format. These bits were unconditionally cleared when setting the pixel format, whereas they should be preserved at their reset values. Reported-by: Brian Starkey Reported-by: Liviu Dudau Signed-off-by: Ayan Kumar halder Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/arm/malidp_planes.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 94e7e3fa3408..16b8b310ae5c 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -23,6 +23,7 @@ /* Layer specific register offsets */ #define MALIDP_LAYER_FORMAT 0x000 +#define LAYER_FORMAT_MASK 0x3f #define MALIDP_LAYER_CONTROL 0x004 #define LAYER_ENABLE (1 << 0) #define LAYER_FLOWCFG_MASK 7 @@ -278,7 +279,9 @@ static void malidp_de_plane_update(struct drm_plane *plane, dest_w = plane->state->crtc_w; dest_h = plane->state->crtc_h; - malidp_hw_write(mp->hwdev, ms->format, mp->layer->base); + val = malidp_hw_read(mp->hwdev, mp->layer->base); + val = (val & ~LAYER_FORMAT_MASK) | ms->format; + malidp_hw_write(mp->hwdev, val, mp->layer->base); for (i = 0; i < ms->n_planes; i++) { /* calculate the offset for the layer's plane registers */ -- GitLab From ed4afe79ba5a2f6761e10b1a91920796d3164ef9 Mon Sep 17 00:00:00 2001 From: Vijay Immanuel Date: Tue, 12 Jun 2018 18:16:05 -0700 Subject: [PATCH 0857/1291] IB/rxe: Fix missing completion for mem_reg work requests [ Upstream commit 375dc53d032fc11e98036b5f228ad13f7c5933f5 ] Run the completer task to post a work completion after processing a memory registration or invalidate work request. This covers the case where the memory registration or invalidate was the last work request posted to the qp. Signed-off-by: Vijay Immanuel Reviewed-by: Yonatan Cohen Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_req.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 54cc9cb1e3b7..de853bcc2384 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -645,6 +645,9 @@ int rxe_requester(void *arg) } else { goto exit; } + if ((wqe->wr.send_flags & IB_SEND_SIGNALED) || + qp->sq_sig_type == IB_SIGNAL_ALL_WR) + rxe_run_task(&qp->comp.task, 1); qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); goto next_wqe; -- GitLab From 7ccd2c1be366d249e7c67997dbc930d5be0879b1 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 8 Jun 2018 18:26:33 +0800 Subject: [PATCH 0858/1291] libahci: Fix possible Spectre-v1 pmp indexing in ahci_led_store() [ Upstream commit fae2a63737e5973f1426bc139935a0f42e232844 ] Currently smatch warns of possible Spectre-V1 issue in ahci_led_store(): drivers/ata/libahci.c:1150 ahci_led_store() warn: potential spectre issue 'pp->em_priv' (local cap) Userspace controls @pmp from following callchain: em_message->store() ->ata_scsi_em_message_store() -->ap->ops->em_store() --->ahci_led_store() After the mask+shift @pmp is effectively an 8b value, which is used to index into an array of length 8, so sanitize the array index. Signed-off-by: John Garry Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libahci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 5ae268b8514e..bc562fd2b0a0 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -1135,10 +1136,12 @@ static ssize_t ahci_led_store(struct ata_port *ap, const char *buf, /* get the slot number from the message */ pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8; - if (pmp < EM_MAX_SLOTS) + if (pmp < EM_MAX_SLOTS) { + pmp = array_index_nospec(pmp, EM_MAX_SLOTS); emp = &pp->em_priv[pmp]; - else + } else { return -EINVAL; + } /* mask off the activity bits if we are in sw_activity * mode, user should turn off sw_activity before setting -- GitLab From 6a38c8a49a9f95e24c80e5ec5568a27c2cb01ed0 Mon Sep 17 00:00:00 2001 From: William Wu Date: Fri, 11 May 2018 17:46:31 +0800 Subject: [PATCH 0859/1291] usb: dwc2: alloc dma aligned buffer for isoc split in [ Upstream commit af424a410749ed7e0c2bffd3cedbc7c274d0ff6f ] The commit 3bc04e28a030 ("usb: dwc2: host: Get aligned DMA in a more supported way") rips out a lot of code to simply the allocation of aligned DMA. However, it also introduces a new issue when use isoc split in transfer. In my test case, I connect the dwc2 controller with an usb hs Hub (GL852G-12), and plug an usb fs audio device (Plantronics headset) into the downstream port of Hub. Then use the usb mic to record, we can find noise when playback. It's because that the usb Hub uses an MDATA for the first transaction and a DATA0 for the second transaction for the isoc split in transaction. An typical isoc split in transaction sequence like this: - SSPLIT IN transaction - CSPLIT IN transaction - MDATA packet - CSPLIT IN transaction - DATA0 packet The DMA address of MDATA (urb->dma) is always DWORD-aligned, but the DMA address of DATA0 (urb->dma + qtd->isoc_split_offset) may not be DWORD-aligned, it depends on the qtd->isoc_split_offset (the length of MDATA). In my test case, the length of MDATA is usually unaligned, this cause DATA0 packet transmission error. This patch use kmem_cache to allocate aligned DMA buf for isoc split in transaction. Note that according to usb 2.0 spec, the maximum data payload size is 1023 bytes for each fs isoc ep, and the maximum allowable interrupt data payload size is 64 bytes or less for fs interrupt ep. So we set the size of object to be 1024 bytes in the kmem cache. Tested-by: Gevorg Sahakyan Tested-by: Heiko Stuebner Acked-by: Minas Harutyunyan hminas@synopsys.com> Signed-off-by: William Wu Reviewed-by: Douglas Anderson Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core.h | 3 ++ drivers/usb/dwc2/hcd.c | 89 ++++++++++++++++++++++++++++++++++-- drivers/usb/dwc2/hcd.h | 8 ++++ drivers/usb/dwc2/hcd_intr.c | 8 ++++ drivers/usb/dwc2/hcd_queue.c | 3 ++ 5 files changed, 106 insertions(+), 5 deletions(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index ec965ac5f1f5..3c0d386dc62f 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -872,6 +872,7 @@ struct dwc2_hregs_backup { * @frame_list_sz: Frame list size * @desc_gen_cache: Kmem cache for generic descriptors * @desc_hsisoc_cache: Kmem cache for hs isochronous descriptors + * @unaligned_cache: Kmem cache for DMA mode to handle non-aligned buf * * These are for peripheral mode: * @@ -1004,6 +1005,8 @@ struct dwc2_hsotg { u32 frame_list_sz; struct kmem_cache *desc_gen_cache; struct kmem_cache *desc_hsisoc_cache; + struct kmem_cache *unaligned_cache; +#define DWC2_KMEM_UNALIGNED_BUF_SIZE 1024 #ifdef DEBUG u32 frrem_samples; diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 46d3b0fc00c5..fa20ec43a187 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -1544,11 +1544,20 @@ static void dwc2_hc_start_transfer(struct dwc2_hsotg *hsotg, } if (hsotg->params.host_dma) { - dwc2_writel((u32)chan->xfer_dma, - hsotg->regs + HCDMA(chan->hc_num)); + dma_addr_t dma_addr; + + if (chan->align_buf) { + if (dbg_hc(chan)) + dev_vdbg(hsotg->dev, "align_buf\n"); + dma_addr = chan->align_buf; + } else { + dma_addr = chan->xfer_dma; + } + dwc2_writel((u32)dma_addr, hsotg->regs + HCDMA(chan->hc_num)); + if (dbg_hc(chan)) dev_vdbg(hsotg->dev, "Wrote %08lx to HCDMA(%d)\n", - (unsigned long)chan->xfer_dma, chan->hc_num); + (unsigned long)dma_addr, chan->hc_num); } /* Start the split */ @@ -2604,6 +2613,35 @@ static void dwc2_hc_init_xfer(struct dwc2_hsotg *hsotg, } } +static int dwc2_alloc_split_dma_aligned_buf(struct dwc2_hsotg *hsotg, + struct dwc2_qh *qh, + struct dwc2_host_chan *chan) +{ + if (!hsotg->unaligned_cache || + chan->max_packet > DWC2_KMEM_UNALIGNED_BUF_SIZE) + return -ENOMEM; + + if (!qh->dw_align_buf) { + qh->dw_align_buf = kmem_cache_alloc(hsotg->unaligned_cache, + GFP_ATOMIC | GFP_DMA); + if (!qh->dw_align_buf) + return -ENOMEM; + } + + qh->dw_align_buf_dma = dma_map_single(hsotg->dev, qh->dw_align_buf, + DWC2_KMEM_UNALIGNED_BUF_SIZE, + DMA_FROM_DEVICE); + + if (dma_mapping_error(hsotg->dev, qh->dw_align_buf_dma)) { + dev_err(hsotg->dev, "can't map align_buf\n"); + chan->align_buf = 0; + return -EINVAL; + } + + chan->align_buf = qh->dw_align_buf_dma; + return 0; +} + #define DWC2_USB_DMA_ALIGN 4 static void dwc2_free_dma_aligned_buffer(struct urb *urb) @@ -2783,6 +2821,32 @@ static int dwc2_assign_and_init_hc(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh) /* Set the transfer attributes */ dwc2_hc_init_xfer(hsotg, chan, qtd); + /* For non-dword aligned buffers */ + if (hsotg->params.host_dma && qh->do_split && + chan->ep_is_in && (chan->xfer_dma & 0x3)) { + dev_vdbg(hsotg->dev, "Non-aligned buffer\n"); + if (dwc2_alloc_split_dma_aligned_buf(hsotg, qh, chan)) { + dev_err(hsotg->dev, + "Failed to allocate memory to handle non-aligned buffer\n"); + /* Add channel back to free list */ + chan->align_buf = 0; + chan->multi_count = 0; + list_add_tail(&chan->hc_list_entry, + &hsotg->free_hc_list); + qtd->in_process = 0; + qh->channel = NULL; + return -ENOMEM; + } + } else { + /* + * We assume that DMA is always aligned in non-split + * case or split out case. Warn if not. + */ + WARN_ON_ONCE(hsotg->params.host_dma && + (chan->xfer_dma & 0x3)); + chan->align_buf = 0; + } + if (chan->ep_type == USB_ENDPOINT_XFER_INT || chan->ep_type == USB_ENDPOINT_XFER_ISOC) /* @@ -5277,6 +5341,19 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg) } } + if (hsotg->params.host_dma) { + /* + * Create kmem caches to handle non-aligned buffer + * in Buffer DMA mode. + */ + hsotg->unaligned_cache = kmem_cache_create("dwc2-unaligned-dma", + DWC2_KMEM_UNALIGNED_BUF_SIZE, 4, + SLAB_CACHE_DMA, NULL); + if (!hsotg->unaligned_cache) + dev_err(hsotg->dev, + "unable to create dwc2 unaligned cache\n"); + } + hsotg->otg_port = 1; hsotg->frame_list = NULL; hsotg->frame_list_dma = 0; @@ -5311,8 +5388,9 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg) return 0; error4: - kmem_cache_destroy(hsotg->desc_gen_cache); + kmem_cache_destroy(hsotg->unaligned_cache); kmem_cache_destroy(hsotg->desc_hsisoc_cache); + kmem_cache_destroy(hsotg->desc_gen_cache); error3: dwc2_hcd_release(hsotg); error2: @@ -5353,8 +5431,9 @@ void dwc2_hcd_remove(struct dwc2_hsotg *hsotg) usb_remove_hcd(hcd); hsotg->priv = NULL; - kmem_cache_destroy(hsotg->desc_gen_cache); + kmem_cache_destroy(hsotg->unaligned_cache); kmem_cache_destroy(hsotg->desc_hsisoc_cache); + kmem_cache_destroy(hsotg->desc_gen_cache); dwc2_hcd_release(hsotg); usb_put_hcd(hcd); diff --git a/drivers/usb/dwc2/hcd.h b/drivers/usb/dwc2/hcd.h index 11c3c145b793..461bdc67df6f 100644 --- a/drivers/usb/dwc2/hcd.h +++ b/drivers/usb/dwc2/hcd.h @@ -75,6 +75,8 @@ struct dwc2_qh; * (micro)frame * @xfer_buf: Pointer to current transfer buffer position * @xfer_dma: DMA address of xfer_buf + * @align_buf: In Buffer DMA mode this will be used if xfer_buf is not + * DWORD aligned * @xfer_len: Total number of bytes to transfer * @xfer_count: Number of bytes transferred so far * @start_pkt_count: Packet count at start of transfer @@ -132,6 +134,7 @@ struct dwc2_host_chan { u8 *xfer_buf; dma_addr_t xfer_dma; + dma_addr_t align_buf; u32 xfer_len; u32 xfer_count; u16 start_pkt_count; @@ -302,6 +305,9 @@ struct dwc2_hs_transfer_time { * is tightly packed. * @ls_duration_us: Duration on the low speed bus schedule. * @ntd: Actual number of transfer descriptors in a list + * @dw_align_buf: Used instead of original buffer if its physical address + * is not dword-aligned + * @dw_align_buf_dma: DMA address for dw_align_buf * @qtd_list: List of QTDs for this QH * @channel: Host channel currently processing transfers for this QH * @qh_list_entry: Entry for QH in either the periodic or non-periodic @@ -345,6 +351,8 @@ struct dwc2_qh { struct dwc2_hs_transfer_time hs_transfers[DWC2_HS_SCHEDULE_UFRAMES]; u32 ls_start_schedule_slice; u16 ntd; + u8 *dw_align_buf; + dma_addr_t dw_align_buf_dma; struct list_head qtd_list; struct dwc2_host_chan *channel; struct list_head qh_list_entry; diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c index 28a8210710b1..5f72140ff824 100644 --- a/drivers/usb/dwc2/hcd_intr.c +++ b/drivers/usb/dwc2/hcd_intr.c @@ -931,6 +931,14 @@ static int dwc2_xfercomp_isoc_split_in(struct dwc2_hsotg *hsotg, frame_desc->actual_length += len; + if (chan->align_buf) { + dev_vdbg(hsotg->dev, "non-aligned buffer\n"); + dma_unmap_single(hsotg->dev, chan->qh->dw_align_buf_dma, + DWC2_KMEM_UNALIGNED_BUF_SIZE, DMA_FROM_DEVICE); + memcpy(qtd->urb->buf + (chan->xfer_dma - qtd->urb->dma), + chan->qh->dw_align_buf, len); + } + qtd->isoc_split_offset += len; hctsiz = dwc2_readl(hsotg->regs + HCTSIZ(chnum)); diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c index 7f51a77bc5cc..56e61220efc6 100644 --- a/drivers/usb/dwc2/hcd_queue.c +++ b/drivers/usb/dwc2/hcd_queue.c @@ -1632,6 +1632,9 @@ void dwc2_hcd_qh_free(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh) if (qh->desc_list) dwc2_hcd_qh_free_ddma(hsotg, qh); + else if (hsotg->unaligned_cache && qh->dw_align_buf) + kmem_cache_free(hsotg->unaligned_cache, qh->dw_align_buf); + kfree(qh); } -- GitLab From a54f59199a81faec807e23fa265ba8f5e9f725a7 Mon Sep 17 00:00:00 2001 From: William Wu Date: Fri, 11 May 2018 17:46:32 +0800 Subject: [PATCH 0860/1291] usb: dwc2: fix isoc split in transfer with no data [ Upstream commit 70c3c8cb83856758025c2a211dd022bc0478922a ] If isoc split in transfer with no data (the length of DATA0 packet is zero), we can't simply return immediately. Because the DATA0 can be the first transaction or the second transaction for the isoc split in transaction. If the DATA0 packet with no data is in the first transaction, we can return immediately. But if the DATA0 packet with no data is in the second transaction of isoc split in transaction sequence, we need to increase the qtd->isoc_frame_index and giveback urb to device driver if needed, otherwise, the MDATA packet will be lost. A typical test case is that connect the dwc2 controller with an usb hs Hub (GL852G-12), and plug an usb fs audio device (Plantronics headset) into the downstream port of Hub. Then use the usb mic to record, we can find noise when playback. In the case, the isoc split in transaction sequence like this: - SSPLIT IN transaction - CSPLIT IN transaction - MDATA packet (176 bytes) - CSPLIT IN transaction - DATA0 packet (0 byte) This patch use both the length of DATA0 and qtd->isoc_split_offset to check if the DATA0 is in the second transaction. Tested-by: Gevorg Sahakyan Tested-by: Heiko Stuebner Acked-by: Minas Harutyunyan hminas@synopsys.com> Signed-off-by: William Wu Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/hcd_intr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c index 5f72140ff824..17905ba1139c 100644 --- a/drivers/usb/dwc2/hcd_intr.c +++ b/drivers/usb/dwc2/hcd_intr.c @@ -923,9 +923,8 @@ static int dwc2_xfercomp_isoc_split_in(struct dwc2_hsotg *hsotg, frame_desc = &qtd->urb->iso_descs[qtd->isoc_frame_index]; len = dwc2_get_actual_xfer_length(hsotg, chan, chnum, qtd, DWC2_HC_XFER_COMPLETE, NULL); - if (!len) { + if (!len && !qtd->isoc_split_offset) { qtd->complete_split = 0; - qtd->isoc_split_offset = 0; return 0; } -- GitLab From 66cae9b5a1d92076b62d47dc102391f8f0493450 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Fri, 25 May 2018 17:24:57 +0800 Subject: [PATCH 0861/1291] usb: gadget: composite: fix delayed_status race condition when set_interface [ Upstream commit 980900d6318066b9f8314bfb87329a20fd0d1ca4 ] It happens when enable debug log, if set_alt() returns USB_GADGET_DELAYED_STATUS and usb_composite_setup_continue() is called before increasing count of @delayed_status, so fix it by using spinlock of @cdev->lock. Signed-off-by: Chunfeng Yun Tested-by: Jay Hsu Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 940de04ed72a..b805962f5154 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1720,6 +1720,8 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) */ if (w_value && !f->get_alt) break; + + spin_lock(&cdev->lock); value = f->set_alt(f, w_index, w_value); if (value == USB_GADGET_DELAYED_STATUS) { DBG(cdev, @@ -1729,6 +1731,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) DBG(cdev, "delayed_status count %d\n", cdev->delayed_status); } + spin_unlock(&cdev->lock); break; case USB_REQ_GET_INTERFACE: if (ctrl->bRequestType != (USB_DIR_IN|USB_RECIP_INTERFACE)) -- GitLab From 6958ade76b364a87a9d6a534255fa6d35ce9890c Mon Sep 17 00:00:00 2001 From: Grigor Tovmasyan Date: Thu, 24 May 2018 18:22:30 +0400 Subject: [PATCH 0862/1291] usb: gadget: dwc2: fix memory leak in gadget_init() [ Upstream commit 9bb073a053f0464ea74a4d4c331fdb7da58568d6 ] Freed allocated request for ep0 to prevent memory leak in case when dwc2_driver_probe() failed. Cc: Stefan Wahren Cc: Marek Szyprowski Tested-by: Stefan Wahren Tested-by: Marek Szyprowski Acked-by: Minas Harutyunyan Signed-off-by: Grigor Tovmasyan Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 7c6aa505fb68..53f97bc3e11b 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -4717,9 +4717,11 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq) } ret = usb_add_gadget_udc(dev, &hsotg->gadget); - if (ret) + if (ret) { + dwc2_hsotg_ep_free_request(&hsotg->eps_out[0]->ep, + hsotg->ctrl_req); return ret; - + } dwc2_hsotg_dump(hsotg); return 0; @@ -4732,6 +4734,7 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq) int dwc2_hsotg_remove(struct dwc2_hsotg *hsotg) { usb_del_gadget_udc(&hsotg->gadget); + dwc2_hsotg_ep_free_request(&hsotg->eps_out[0]->ep, hsotg->ctrl_req); return 0; } -- GitLab From 72bc7a2f77ba0993815947735d234b6e5c63eb6c Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Wed, 23 May 2018 16:24:44 +0400 Subject: [PATCH 0863/1291] dwc2: gadget: Fix ISOC IN DDMA PID bitfield value calculation [ Upstream commit 1d8e5c00275825fc42aaa5597dab1d0b5b26bb64 ] PID bitfield in descriptor should be set based on particular request length, not based on EP's mc value. PID value can't be set to 0 even request length is 0. Signed-off-by: Minas Harutyunyan Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 53f97bc3e11b..e164439b2154 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -848,6 +848,7 @@ static int dwc2_gadget_fill_isoc_desc(struct dwc2_hsotg_ep *hs_ep, u32 index; u32 maxsize = 0; u32 mask = 0; + u8 pid = 0; maxsize = dwc2_gadget_get_desc_params(hs_ep, &mask); if (len > maxsize) { @@ -893,7 +894,11 @@ static int dwc2_gadget_fill_isoc_desc(struct dwc2_hsotg_ep *hs_ep, ((len << DEV_DMA_NBYTES_SHIFT) & mask)); if (hs_ep->dir_in) { - desc->status |= ((hs_ep->mc << DEV_DMA_ISOC_PID_SHIFT) & + if (len) + pid = DIV_ROUND_UP(len, hs_ep->ep.maxpacket); + else + pid = 1; + desc->status |= ((pid << DEV_DMA_ISOC_PID_SHIFT) & DEV_DMA_ISOC_PID_MASK) | ((len % hs_ep->ep.maxpacket) ? DEV_DMA_SHORT : 0) | -- GitLab From 395c67c1c02d6282a61224139560050c9d238479 Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Fri, 15 Jun 2018 07:34:52 +0800 Subject: [PATCH 0864/1291] xen: add error handling for xenbus_printf [ Upstream commit 84c029a73327cef571eaa61c7d6e67e8031b52ec ] When xenbus_printf fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling xenbus_printf. Signed-off-by: Zhouyang Jia Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/xen/manage.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index c425d03d37d2..587d12829925 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -292,8 +292,15 @@ static void sysrq_handler(struct xenbus_watch *watch, const char *path, return; } - if (sysrq_key != '\0') - xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); + if (sysrq_key != '\0') { + err = xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); + if (err) { + pr_err("%s: Error %d writing sysrq in control/sysrq\n", + __func__, err); + xenbus_transaction_end(xbt, 1); + return; + } + } err = xenbus_transaction_end(xbt, 0); if (err == -EAGAIN) @@ -345,7 +352,12 @@ static int setup_shutdown_watcher(void) continue; snprintf(node, FEATURE_PATH_SIZE, "feature-%s", shutdown_handlers[idx].command); - xenbus_printf(XBT_NIL, "control", node, "%u", 1); + err = xenbus_printf(XBT_NIL, "control", node, "%u", 1); + if (err) { + pr_err("%s: Error %d writing %s\n", __func__, + err, node); + return err; + } } return 0; -- GitLab From 586ae5694e375ceced8abafe1501eece4e922852 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Jun 2018 15:58:45 -0400 Subject: [PATCH 0865/1291] pNFS: Always free the session slot on error in nfs4_layoutget_handle_exception [ Upstream commit 2dbf8dffbf35fd8f611083b9d9fe74fdccf912a3 ] Right now, we can call nfs_commit_inode() while holding the session slot, which could lead to NFSv4 deadlocks. Ensure we only keep the slot if the server returned a layout that we have to process. Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 43fbf4495090..51deff8e1f86 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8429,6 +8429,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status); + nfs4_sequence_free_slot(&lgp->res.seq_res); + switch (nfs4err) { case 0: goto out; @@ -8493,7 +8495,6 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, goto out; } - nfs4_sequence_free_slot(&lgp->res.seq_res); err = nfs4_handle_exception(server, nfs4err, exception); if (!status) { if (exception->retry) @@ -8619,20 +8620,22 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) if (IS_ERR(task)) return ERR_CAST(task); status = rpc_wait_for_completion_task(task); - if (status == 0) { + if (status != 0) + goto out; + + /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ + if (task->tk_status < 0 || lgp->res.layoutp->len == 0) { status = nfs4_layoutget_handle_exception(task, lgp, &exception); *timeout = exception.timeout; - } - + } else + lseg = pnfs_layout_process(lgp); +out: trace_nfs4_layoutget(lgp->args.ctx, &lgp->args.range, &lgp->res.range, &lgp->res.stateid, status); - /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ - if (status == 0 && lgp->res.layoutp->len) - lseg = pnfs_layout_process(lgp); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); if (status) -- GitLab From 91e1bb748bcc9fb1737eacb1c26abd62a2ae7076 Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Sat, 16 Jun 2018 01:05:01 +0800 Subject: [PATCH 0866/1291] scsi: xen-scsifront: add error handling for xenbus_printf [ Upstream commit 93efbd39870474cc536b9caf4a6efeb03b0bc56f ] When xenbus_printf fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling xenbus_printf. Signed-off-by: Zhouyang Jia Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/xen-scsifront.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 36f59a1be7e9..61389bdc7926 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -654,10 +654,17 @@ static int scsifront_dev_reset_handler(struct scsi_cmnd *sc) static int scsifront_sdev_configure(struct scsi_device *sdev) { struct vscsifrnt_info *info = shost_priv(sdev->host); + int err; - if (info && current == info->curr) - xenbus_printf(XBT_NIL, info->dev->nodename, + if (info && current == info->curr) { + err = xenbus_printf(XBT_NIL, info->dev->nodename, info->dev_state_path, "%d", XenbusStateConnected); + if (err) { + xenbus_dev_error(info->dev, err, + "%s: writing dev_state_path", __func__); + return err; + } + } return 0; } @@ -665,10 +672,15 @@ static int scsifront_sdev_configure(struct scsi_device *sdev) static void scsifront_sdev_destroy(struct scsi_device *sdev) { struct vscsifrnt_info *info = shost_priv(sdev->host); + int err; - if (info && current == info->curr) - xenbus_printf(XBT_NIL, info->dev->nodename, + if (info && current == info->curr) { + err = xenbus_printf(XBT_NIL, info->dev->nodename, info->dev_state_path, "%d", XenbusStateClosed); + if (err) + xenbus_dev_error(info->dev, err, + "%s: writing dev_state_path", __func__); + } } static struct scsi_host_template scsifront_sht = { @@ -1003,9 +1015,12 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op) if (scsi_add_device(info->host, chn, tgt, lun)) { dev_err(&dev->dev, "scsi_add_device\n"); - xenbus_printf(XBT_NIL, dev->nodename, + err = xenbus_printf(XBT_NIL, dev->nodename, info->dev_state_path, "%d", XenbusStateClosed); + if (err) + xenbus_dev_error(dev, err, + "%s: writing dev_state_path", __func__); } break; case VSCSIFRONT_OP_DEL_LUN: @@ -1019,10 +1034,14 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op) } break; case VSCSIFRONT_OP_READD_LUN: - if (device_state == XenbusStateConnected) - xenbus_printf(XBT_NIL, dev->nodename, + if (device_state == XenbusStateConnected) { + err = xenbus_printf(XBT_NIL, dev->nodename, info->dev_state_path, "%d", XenbusStateConnected); + if (err) + xenbus_dev_error(dev, err, + "%s: writing dev_state_path", __func__); + } break; default: break; -- GitLab From 2e62d135dd530d7144f55c6ce84e2ec29f48596a Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Sat, 16 Jun 2018 08:14:37 +0800 Subject: [PATCH 0867/1291] xen/scsiback: add error handling for xenbus_printf [ Upstream commit 7c63ca24c878e0051c91904b72174029320ef4bd ] When xenbus_printf fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling xenbus_printf. Signed-off-by: Zhouyang Jia Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-scsiback.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 7bc88fd43cfc..e2f3e8b0fba9 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1012,6 +1012,7 @@ static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state, { struct v2p_entry *entry; unsigned long flags; + int err; if (try) { spin_lock_irqsave(&info->v2p_lock, flags); @@ -1027,8 +1028,11 @@ static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state, scsiback_del_translation_entry(info, vir); } } else if (!try) { - xenbus_printf(XBT_NIL, info->dev->nodename, state, + err = xenbus_printf(XBT_NIL, info->dev->nodename, state, "%d", XenbusStateClosed); + if (err) + xenbus_dev_error(info->dev, err, + "%s: writing %s", __func__, state); } } @@ -1067,8 +1071,11 @@ static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op, snprintf(str, sizeof(str), "vscsi-devs/%s/p-dev", ent); val = xenbus_read(XBT_NIL, dev->nodename, str, NULL); if (IS_ERR(val)) { - xenbus_printf(XBT_NIL, dev->nodename, state, + err = xenbus_printf(XBT_NIL, dev->nodename, state, "%d", XenbusStateClosed); + if (err) + xenbus_dev_error(info->dev, err, + "%s: writing %s", __func__, state); return; } strlcpy(phy, val, VSCSI_NAMELEN); @@ -1079,8 +1086,11 @@ static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op, err = xenbus_scanf(XBT_NIL, dev->nodename, str, "%u:%u:%u:%u", &vir.hst, &vir.chn, &vir.tgt, &vir.lun); if (XENBUS_EXIST_ERR(err)) { - xenbus_printf(XBT_NIL, dev->nodename, state, + err = xenbus_printf(XBT_NIL, dev->nodename, state, "%d", XenbusStateClosed); + if (err) + xenbus_dev_error(info->dev, err, + "%s: writing %s", __func__, state); return; } -- GitLab From a929f067a88f0f9a2b02e328f746007f645fa920 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 12 Jun 2018 13:08:40 +0200 Subject: [PATCH 0868/1291] arm64: dma-mapping: clear buffers allocated with FORCE_CONTIGUOUS flag [ Upstream commit dd65a941f6ba473a5cb9d013d57fa43b48450a04 ] dma_alloc_*() buffers might be exposed to userspace via mmap() call, so they should be cleared on allocation. In case of IOMMU-based dma-mapping implementation such buffer clearing was missing in the code path for DMA_ATTR_FORCE_CONTIGUOUS flag handling, because dma_alloc_from_contiguous() doesn't honor __GFP_ZERO flag. This patch fixes this issue. For more information on clearing buffers allocated by dma_alloc_* functions, see commit 6829e274a623 ("arm64: dma-mapping: always clear allocated buffers"). Fixes: 44176bb38fa4 ("arm64: Add support for DMA_ATTR_FORCE_CONTIGUOUS to IOMMU") Signed-off-by: Marek Szyprowski Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/dma-mapping.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 614af886b7ef..58470b151bc3 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -629,13 +629,14 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, size >> PAGE_SHIFT); return NULL; } - if (!coherent) - __dma_flush_area(page_to_virt(page), iosize); - addr = dma_common_contiguous_remap(page, size, VM_USERMAP, prot, __builtin_return_address(0)); - if (!addr) { + if (addr) { + memset(addr, 0, size); + if (!coherent) + __dma_flush_area(page_to_virt(page), iosize); + } else { iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs); dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); -- GitLab From d22240dd6ab34a69d8e6f07fd74bd53270b554fa Mon Sep 17 00:00:00 2001 From: Zhizhou Zhang Date: Tue, 12 Jun 2018 17:07:37 +0800 Subject: [PATCH 0869/1291] arm64: make secondary_start_kernel() notrace [ Upstream commit b154886f7892499d0d3054026e19dfb9a731df61 ] We can't call function trace hook before setup percpu offset. When entering secondary_start_kernel(), percpu offset has not been initialized. So this lead hotplug malfunction. Here is the flow to reproduce this bug: echo 0 > /sys/devices/system/cpu/cpu1/online echo function > /sys/kernel/debug/tracing/current_tracer echo 1 > /sys/kernel/debug/tracing/tracing_on echo 1 > /sys/devices/system/cpu/cpu1/online Acked-by: Mark Rutland Tested-by: Suzuki K Poulose Signed-off-by: Zhizhou Zhang Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 9f7195a5773e..b7ad41d7b6ee 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -214,7 +214,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. */ -asmlinkage void secondary_start_kernel(void) +asmlinkage notrace void secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; unsigned int cpu; -- GitLab From 26c6ffff8eec39bb8b767a81c7d8f28f54712919 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Mon, 18 Jun 2018 21:58:00 -0700 Subject: [PATCH 0870/1291] qed: Fix possible memory leak in Rx error path handling. [ Upstream commit 4f9de4df901fb84709fe3a864dfa4eaf35700f68 ] Memory for packet buffers need to be freed in the error paths as there is no consumer (e.g., upper layer) for such packets and that memory will never get freed. The issue was uncovered when port was attacked with flood of isatap packets, these are multicast packets hence were directed at all the PFs. For foce PF, this meant they were routed to the ll2 module which in turn drops such packets. Fixes: 0a7fb11c ("qed: Add Light L2 support") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index c06ad4f0758e..5f52f14761a3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -201,8 +201,9 @@ void qed_ll2b_complete_rx_packet(void *cxt, struct qed_ll2_comp_rx_data *data) skb = build_skb(buffer->data, 0); if (!skb) { - rc = -ENOMEM; - goto out_post; + DP_INFO(cdev, "Failed to build SKB\n"); + kfree(buffer->data); + goto out_post1; } data->u.placement_offset += NET_SKB_PAD; @@ -224,8 +225,14 @@ void qed_ll2b_complete_rx_packet(void *cxt, struct qed_ll2_comp_rx_data *data) cdev->ll2->cbs->rx_cb(cdev->ll2->cb_cookie, skb, data->opaque_data_0, data->opaque_data_1); + } else { + DP_VERBOSE(p_hwfn, (NETIF_MSG_RX_STATUS | NETIF_MSG_PKTDATA | + QED_MSG_LL2 | QED_MSG_STORAGE), + "Dropping the packet\n"); + kfree(buffer->data); } +out_post1: /* Update Buffer information and update FW producer */ buffer->data = new_data; buffer->phys_addr = new_phys_addr; -- GitLab From 3b29b09a53abd1c8c9d32a8129a47372a93dd15a Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Mon, 18 Jun 2018 21:58:01 -0700 Subject: [PATCH 0871/1291] qed: Add sanity check for SIMD fastpath handler. [ Upstream commit 3935a70968820c3994db4de7e6e1c7e814bff875 ] Avoid calling a SIMD fastpath handler if it is NULL. The check is needed to handle an unlikely scenario where unsolicited interrupt is destined to a PF in INTa mode. Fixes: fe56b9e6a ("qed: Add module with basic common support") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_main.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 2c958921dfb3..954f7ce4cf28 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -565,8 +565,16 @@ static irqreturn_t qed_single_int(int irq, void *dev_instance) /* Fastpath interrupts */ for (j = 0; j < 64; j++) { if ((0x2ULL << j) & status) { - hwfn->simd_proto_handler[j].func( - hwfn->simd_proto_handler[j].token); + struct qed_simd_fp_handler *p_handler = + &hwfn->simd_proto_handler[j]; + + if (p_handler->func) + p_handler->func(p_handler->token); + else + DP_NOTICE(hwfn, + "Not calling fastpath handler as it is NULL [handler #%d, status 0x%llx]\n", + j, status); + status &= ~(0x2ULL << j); rc = IRQ_HANDLED; } -- GitLab From 9418ea8cf216abcbed3f3a5a2cea755d3aaa415b Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Mon, 18 Jun 2018 21:58:02 -0700 Subject: [PATCH 0872/1291] qed: Do not advertise DCBX_LLD_MANAGED capability. [ Upstream commit ff54d5cd9ec15546abc870452dd0b66eef4b4606 ] Do not advertise DCBX_LLD_MANAGED capability i.e., do not allow external agent to manage the dcbx/lldp negotiation. MFW acts as lldp agent for qed* devices, and no other lldp agent is allowed to coexist with mfw. Also updated a debug print, to not to display the redundant info. Fixes: a1d8d8a51 ("qed: Add dcbnl support.") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index b306961b02fd..d62dccb85539 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -255,9 +255,8 @@ qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn, *type = DCBX_PROTOCOL_ROCE_V2; } else { *type = DCBX_MAX_PROTOCOL_TYPE; - DP_ERR(p_hwfn, - "No action required, App TLV id = 0x%x app_prio_bitmap = 0x%x\n", - id, app_prio_bitmap); + DP_ERR(p_hwfn, "No action required, App TLV entry = 0x%x\n", + app_prio_bitmap); return false; } @@ -1472,8 +1471,8 @@ static u8 qed_dcbnl_getcap(struct qed_dev *cdev, int capid, u8 *cap) *cap = 0x80; break; case DCB_CAP_ATTR_DCBX: - *cap = (DCB_CAP_DCBX_LLD_MANAGED | DCB_CAP_DCBX_VER_CEE | - DCB_CAP_DCBX_VER_IEEE | DCB_CAP_DCBX_STATIC); + *cap = (DCB_CAP_DCBX_VER_CEE | DCB_CAP_DCBX_VER_IEEE | + DCB_CAP_DCBX_STATIC); break; default: *cap = false; @@ -1541,8 +1540,6 @@ static u8 qed_dcbnl_getdcbx(struct qed_dev *cdev) if (!dcbx_info) return 0; - if (dcbx_info->operational.enabled) - mode |= DCB_CAP_DCBX_LLD_MANAGED; if (dcbx_info->operational.ieee) mode |= DCB_CAP_DCBX_VER_IEEE; if (dcbx_info->operational.cee) -- GitLab From fb24c6df0f8ec986894b77baec225679de3a0d78 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Tue, 19 Jun 2018 08:15:24 -0700 Subject: [PATCH 0873/1291] enic: initialize enic->rfs_h.lock in enic_probe [ Upstream commit 3256d29fc7aecdf99feb1cb9475ed2252769a8a7 ] lockdep spotted that we are using rfs_h.lock in enic_get_rxnfc() without initializing. rfs_h.lock is initialized in enic_open(). But ethtool_ops can be called when interface is down. Move enic_rfs_flw_tbl_init to enic_probe. INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 18 PID: 1189 Comm: ethtool Not tainted 4.17.0-rc7-devel+ #27 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014 Call Trace: dump_stack+0x85/0xc0 register_lock_class+0x550/0x560 ? __handle_mm_fault+0xa8b/0x1100 __lock_acquire+0x81/0x670 lock_acquire+0xb9/0x1e0 ? enic_get_rxnfc+0x139/0x2b0 [enic] _raw_spin_lock_bh+0x38/0x80 ? enic_get_rxnfc+0x139/0x2b0 [enic] enic_get_rxnfc+0x139/0x2b0 [enic] ethtool_get_rxnfc+0x8d/0x1c0 dev_ethtool+0x16c8/0x2400 ? __mutex_lock+0x64d/0xa00 ? dev_load+0x6a/0x150 dev_ioctl+0x253/0x4b0 sock_do_ioctl+0x9a/0x130 sock_ioctl+0x1af/0x350 do_vfs_ioctl+0x8e/0x670 ? syscall_trace_enter+0x1e2/0x380 ksys_ioctl+0x60/0x90 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x5a/0x170 entry_SYSCALL_64_after_hwframe+0x49/0xbe Signed-off-by: Govindarajulu Varadarajan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cisco/enic/enic_clsf.c | 3 +-- drivers/net/ethernet/cisco/enic/enic_main.c | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c index 8dc21c9f9716..df613a87ccff 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.c +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c @@ -79,7 +79,6 @@ void enic_rfs_flw_tbl_init(struct enic *enic) enic->rfs_h.max = enic->config.num_arfs; enic->rfs_h.free = enic->rfs_h.max; enic->rfs_h.toclean = 0; - enic_rfs_timer_start(enic); } void enic_rfs_flw_tbl_free(struct enic *enic) @@ -88,7 +87,6 @@ void enic_rfs_flw_tbl_free(struct enic *enic) enic_rfs_timer_stop(enic); spin_lock_bh(&enic->rfs_h.lock); - enic->rfs_h.free = 0; for (i = 0; i < (1 << ENIC_RFS_FLW_BITSHIFT); i++) { struct hlist_head *hhead; struct hlist_node *tmp; @@ -99,6 +97,7 @@ void enic_rfs_flw_tbl_free(struct enic *enic) enic_delfltr(enic, n->fltr_id); hlist_del(&n->node); kfree(n); + enic->rfs_h.free++; } } spin_unlock_bh(&enic->rfs_h.lock); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index a03a32a4ffca..800edfbd36c1 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1930,7 +1930,7 @@ static int enic_open(struct net_device *netdev) vnic_intr_unmask(&enic->intr[i]); enic_notify_timer_start(enic); - enic_rfs_flw_tbl_init(enic); + enic_rfs_timer_start(enic); return 0; @@ -2854,6 +2854,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) enic->notify_timer.function = enic_notify_timer; enic->notify_timer.data = (unsigned long)enic; + enic_rfs_flw_tbl_init(enic); enic_set_rx_coal_setting(enic); INIT_WORK(&enic->reset, enic_reset); INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset); -- GitLab From 7c828ea375a19f84f3f1c0546adc05766102aa6f Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 17 Jun 2018 23:40:53 +0200 Subject: [PATCH 0874/1291] net: hamradio: use eth_broadcast_addr [ Upstream commit 4e8439aa34802deab11cee68b0ecb18f887fb153 ] The array bpq_eth_addr is only used to get the size of an address, whereas the bcast_addr is used to set the broadcast address. This leads to a warning when using clang: drivers/net/hamradio/bpqether.c:94:13: warning: variable 'bpq_eth_addr' is not needed and will not be emitted [-Wunneeded-internal-declaration] static char bpq_eth_addr[6]; ^ Remove both variables and use the common eth_broadcast_addr to set the broadcast address. Signed-off-by: Stefan Agner Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/hamradio/bpqether.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 78a6414c5fd9..7d94a7842557 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -89,10 +89,6 @@ static const char banner[] __initconst = KERN_INFO \ "AX.25: bpqether driver version 004\n"; -static char bcast_addr[6]={0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; - -static char bpq_eth_addr[6]; - static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static int bpq_device_event(struct notifier_block *, unsigned long, void *); @@ -515,8 +511,8 @@ static int bpq_new_device(struct net_device *edev) bpq->ethdev = edev; bpq->axdev = ndev; - memcpy(bpq->dest_addr, bcast_addr, sizeof(bpq_eth_addr)); - memcpy(bpq->acpt_addr, bcast_addr, sizeof(bpq_eth_addr)); + eth_broadcast_addr(bpq->dest_addr); + eth_broadcast_addr(bpq->acpt_addr); err = register_netdevice(ndev); if (err) -- GitLab From e0eaae3872840ab8b57457da9b2a82551136168d Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 19 Jun 2018 17:23:17 +0800 Subject: [PATCH 0875/1291] net: propagate dev_get_valid_name return code [ Upstream commit 7892bd081045222b9e4027fec279a28d6fe7aa66 ] if dev_get_valid_name failed, propagate its return code and remove the setting err to ENODEV, it will be set to 0 again before dev_change_net_namespace exits. Signed-off-by: Li RongQing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 6ca771f2f25b..85f4a1047707 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8297,7 +8297,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* We get here if we can't use the current device name */ if (!pat) goto out; - if (dev_get_valid_name(net, dev, pat) < 0) + err = dev_get_valid_name(net, dev, pat); + if (err < 0) goto out; } @@ -8309,7 +8310,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev_close(dev); /* And unlink it from device chain */ - err = -ENODEV; unlist_netdevice(dev); synchronize_net(); -- GitLab From 62e4c9049e869e34bcc321aac6a16c79c30946e4 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Tue, 19 Jun 2018 10:35:38 -0500 Subject: [PATCH 0876/1291] net: stmmac: socfpga: add additional ocp reset line for Stratix10 [ Upstream commit bc8a2d9bcbf1ca548b1deb315d14e1da81945bea ] The Stratix10 platform has an additional reset line, OCP(Open Core Protocol), that also needs to get deasserted for the stmmac ethernet controller to work. Thus we need to update the Kconfig to include ARCH_STRATIX10 in order to build dwmac-socfpga. Also, remove the redundant check for the reset controller pointer. The reset driver already checks for the pointer and returns 0 if the pointer is NULL. Signed-off-by: Dinh Nguyen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 2 +- .../ethernet/stmicro/stmmac/dwmac-socfpga.c | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 97035766c291..5790cd61436d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -111,7 +111,7 @@ config DWMAC_ROCKCHIP config DWMAC_SOCFPGA tristate "SOCFPGA dwmac support" default ARCH_SOCFPGA - depends on OF && (ARCH_SOCFPGA || COMPILE_TEST) + depends on OF && (ARCH_SOCFPGA || ARCH_STRATIX10 || COMPILE_TEST) select MFD_SYSCON help Support for ethernet controller on Altera SOCFPGA diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 6e359572b9f0..5b3b06a0a3bf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -55,6 +55,7 @@ struct socfpga_dwmac { struct device *dev; struct regmap *sys_mgr_base_addr; struct reset_control *stmmac_rst; + struct reset_control *stmmac_ocp_rst; void __iomem *splitter_base; bool f2h_ptp_ref_clk; struct tse_pcs pcs; @@ -262,8 +263,8 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac) val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII; /* Assert reset to the enet controller before changing the phy mode */ - if (dwmac->stmmac_rst) - reset_control_assert(dwmac->stmmac_rst); + reset_control_assert(dwmac->stmmac_ocp_rst); + reset_control_assert(dwmac->stmmac_rst); regmap_read(sys_mgr_base_addr, reg_offset, &ctrl); ctrl &= ~(SYSMGR_EMACGRP_CTRL_PHYSEL_MASK << reg_shift); @@ -288,8 +289,8 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac) /* Deassert reset for the phy configuration to be sampled by * the enet controller, and operation to start in requested mode */ - if (dwmac->stmmac_rst) - reset_control_deassert(dwmac->stmmac_rst); + reset_control_deassert(dwmac->stmmac_ocp_rst); + reset_control_deassert(dwmac->stmmac_rst); if (phymode == PHY_INTERFACE_MODE_SGMII) { if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) { dev_err(dwmac->dev, "Unable to initialize TSE PCS"); @@ -324,6 +325,15 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) goto err_remove_config_dt; } + dwmac->stmmac_ocp_rst = devm_reset_control_get_optional(dev, "stmmaceth-ocp"); + if (IS_ERR(dwmac->stmmac_ocp_rst)) { + ret = PTR_ERR(dwmac->stmmac_ocp_rst); + dev_err(dev, "error getting reset control of ocp %d\n", ret); + goto err_remove_config_dt; + } + + reset_control_deassert(dwmac->stmmac_ocp_rst); + ret = socfpga_dwmac_parse_data(dwmac, dev); if (ret) { dev_err(dev, "Unable to parse OF data\n"); -- GitLab From 955887c1fe90a2be056358001fd4920614a0f8b6 Mon Sep 17 00:00:00 2001 From: Max Gurtuvoy Date: Tue, 19 Jun 2018 15:45:33 +0300 Subject: [PATCH 0877/1291] nvmet: reset keep alive timer in controller enable [ Upstream commit d68a90e148f5a82aa67654c5012071e31c0e4baa ] Controllers that are not yet enabled should not really enforce keep alive timeouts, but we still want to track a timeout and cleanup in case a host died before it enabled the controller. Hence, simply reset the keep alive timer when the controller is enabled. Suggested-by: Max Gurtovoy Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 240b0d628222..5fa7856f6b34 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -598,6 +598,14 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) } ctrl->csts = NVME_CSTS_RDY; + + /* + * Controllers that are not yet enabled should not really enforce the + * keep alive timeout, but we still want to track a timeout and cleanup + * in case a host died before it enabled the controller. Hence, simply + * reset the keep alive timer when the controller is enabled. + */ + mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); } static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) -- GitLab From 6d2b87505f7a59b464a59e1d0870aa26d0b1f0e1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 20 Jun 2018 13:41:51 +0300 Subject: [PATCH 0878/1291] block: sed-opal: Fix a couple off by one bugs [ Upstream commit ce042c183bcb94eb2919e8036473a1fc203420f9 ] resp->num is the number of tokens in resp->tok[]. It gets set in response_parse(). So if n == resp->num then we're reading beyond the end of the data. Fixes: 455a7b238cd6 ("block: Add Sed-opal library") Reviewed-by: Scott Bauer Tested-by: Scott Bauer Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/sed-opal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/sed-opal.c b/block/sed-opal.c index 9ed51d0c6b1d..4f5e70d4abc3 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -877,7 +877,7 @@ static size_t response_get_string(const struct parsed_resp *resp, int n, return 0; } - if (n > resp->num) { + if (n >= resp->num) { pr_debug("Response has %d tokens. Can't access %d\n", resp->num, n); return 0; @@ -899,7 +899,7 @@ static u64 response_get_u64(const struct parsed_resp *resp, int n) return 0; } - if (n > resp->num) { + if (n >= resp->num) { pr_debug("Response has %d tokens. Can't access %d\n", resp->num, n); return 0; -- GitLab From fbeb2ee3ecca6ad25957a08e9ae69cb67071528d Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 29 Nov 2017 11:21:45 +0300 Subject: [PATCH 0879/1291] ARC: Enable machine_desc->init_per_cpu for !CONFIG_SMP [ Upstream commit 2f24ef7413a4d91657ef04e77c27ce0b313e6c95 ] machine_desc->init_per_cpu() hook is supposed to be per cpu initialization and would seem to apply equally to UP and/or SMP. Infact the comment in header file seems to suggest it works for UP too, which was not the case and this patch. This enables !CONFIG_SMP build for platforms such as hsdk. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta [vgupta: trimmeed changelog] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/mach_desc.h | 2 -- arch/arc/kernel/irq.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h index c28e6c347b49..871f3cb16af9 100644 --- a/arch/arc/include/asm/mach_desc.h +++ b/arch/arc/include/asm/mach_desc.h @@ -34,9 +34,7 @@ struct machine_desc { const char *name; const char **dt_compat; void (*init_early)(void); -#ifdef CONFIG_SMP void (*init_per_cpu)(unsigned int); -#endif void (*init_machine)(void); void (*init_late)(void); diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 538b36afe89e..62b185057c04 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -31,10 +31,10 @@ void __init init_IRQ(void) /* a SMP H/w block could do IPI IRQ request here */ if (plat_smp_ops.init_per_cpu) plat_smp_ops.init_per_cpu(smp_processor_id()); +#endif if (machine_desc->init_per_cpu) machine_desc->init_per_cpu(smp_processor_id()); -#endif } /* -- GitLab From 9b0b625841d89e94f7c5321a2bfe8ccbea8b8bb1 Mon Sep 17 00:00:00 2001 From: Doron Roberts-Kedes Date: Fri, 15 Jun 2018 14:05:32 -0700 Subject: [PATCH 0880/1291] nbd: Add the nbd NBD_DISCONNECT_ON_CLOSE config flag. [ Upstream commit 08ba91ee6e2c1c08d3f0648f978cbb5dbf3491d8 ] If NBD_DISCONNECT_ON_CLOSE is set on a device, then the driver will issue a disconnect from nbd_release if the device has no remaining bdev->bd_openers. Fix ret val so reconfigure with only setting the flag succeeds. Reviewed-by: Josef Bacik Signed-off-by: Doron Roberts-Kedes Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 40 +++++++++++++++++++++++++++++++++------- include/uapi/linux/nbd.h | 3 +++ 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 6fb64e73bc96..5feba04ab940 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -76,6 +76,7 @@ struct link_dead_args { #define NBD_HAS_CONFIG_REF 4 #define NBD_BOUND 5 #define NBD_DESTROY_ON_DISCONNECT 6 +#define NBD_DISCONNECT_ON_CLOSE 7 struct nbd_config { u32 flags; @@ -138,6 +139,7 @@ static void nbd_config_put(struct nbd_device *nbd); static void nbd_connect_reply(struct genl_info *info, int index); static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); static void nbd_dead_link_work(struct work_struct *work); +static void nbd_disconnect_and_put(struct nbd_device *nbd); static inline struct device *nbd_to_dev(struct nbd_device *nbd) { @@ -1291,6 +1293,12 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) static void nbd_release(struct gendisk *disk, fmode_t mode) { struct nbd_device *nbd = disk->private_data; + struct block_device *bdev = bdget_disk(disk, 0); + + if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && + bdev->bd_openers == 0) + nbd_disconnect_and_put(nbd); + nbd_config_put(nbd); nbd_put(nbd); } @@ -1690,6 +1698,10 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) &config->runtime_flags); put_dev = true; } + if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { + set_bit(NBD_DISCONNECT_ON_CLOSE, + &config->runtime_flags); + } } if (info->attrs[NBD_ATTR_SOCKETS]) { @@ -1734,6 +1746,16 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) return ret; } +static void nbd_disconnect_and_put(struct nbd_device *nbd) +{ + mutex_lock(&nbd->config_lock); + nbd_disconnect(nbd); + mutex_unlock(&nbd->config_lock); + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, + &nbd->config->runtime_flags)) + nbd_config_put(nbd); +} + static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) { struct nbd_device *nbd; @@ -1766,12 +1788,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) nbd_put(nbd); return 0; } - mutex_lock(&nbd->config_lock); - nbd_disconnect(nbd); - mutex_unlock(&nbd->config_lock); - if (test_and_clear_bit(NBD_HAS_CONFIG_REF, - &nbd->config->runtime_flags)) - nbd_config_put(nbd); + nbd_disconnect_and_put(nbd); nbd_config_put(nbd); nbd_put(nbd); return 0; @@ -1782,7 +1799,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) struct nbd_device *nbd = NULL; struct nbd_config *config; int index; - int ret = -EINVAL; + int ret = 0; bool put_dev = false; if (!netlink_capable(skb, CAP_SYS_ADMIN)) @@ -1822,6 +1839,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) !nbd->task_recv) { dev_err(nbd_to_dev(nbd), "not configured, cannot reconfigure\n"); + ret = -EINVAL; goto out; } @@ -1846,6 +1864,14 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) &config->runtime_flags)) refcount_inc(&nbd->refs); } + + if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { + set_bit(NBD_DISCONNECT_ON_CLOSE, + &config->runtime_flags); + } else { + clear_bit(NBD_DISCONNECT_ON_CLOSE, + &config->runtime_flags); + } } if (info->attrs[NBD_ATTR_SOCKETS]) { diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index 85a3fb65e40a..20d6cc91435d 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -53,6 +53,9 @@ enum { /* These are client behavior specific flags. */ #define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on disconnect. */ +#define NBD_CFLAG_DISCONNECT_ON_CLOSE (1 << 1) /* disconnect the nbd device on + * close by last opener. + */ /* userspace doesn't need the nbd_device structure */ -- GitLab From 363b8de31ac91621865c3bc4e9838be86f64519f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 20 Jun 2018 10:03:56 +0200 Subject: [PATCH 0881/1291] net: davinci_emac: match the mdio device against its compatible if possible [ Upstream commit ea0820bb771175c7d4192fc6f5b5c56b3c6d5239 ] Device tree based systems without of_dev_auxdata will have the mdio device named differently than "davinci_mdio(.0)". In this case use the device's parent's compatible string for matching Signed-off-by: Bartosz Golaszewski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ti/davinci_emac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 4bb561856af5..47a096134043 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1387,6 +1387,10 @@ static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd) static int match_first_device(struct device *dev, void *data) { + if (dev->parent && dev->parent->of_node) + return of_device_is_compatible(dev->parent->of_node, + "ti,davinci_mdio"); + return !strncmp(dev_name(dev), "davinci_mdio", 12); } -- GitLab From 8fb20507bca0f610b44e5c38c9052a03f8225dfe Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 20 Jun 2018 12:47:52 -0300 Subject: [PATCH 0882/1291] sctp: fix erroneous inc of snmp SctpFragUsrMsgs [ Upstream commit fedb1bd3d274b33c432cb83c80c6b3cf54d509c8 ] Currently it is incrementing SctpFragUsrMsgs when the user message size is of the exactly same size as the maximum fragment size, which is wrong. The fix is to increment it only when user message is bigger than the maximum fragment size. Fixes: bfd2e4b8734d ("sctp: refactor sctp_datamsg_from_user") Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sctp/chunk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 3afac275ee82..acd9380a56fb 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -230,7 +230,9 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, /* Account for a different sized first fragment */ if (msg_len >= first_len) { msg->can_delay = 0; - SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); + if (msg_len > first_len) + SCTP_INC_STATS(sock_net(asoc->base.sk), + SCTP_MIB_FRAGUSRMSGS); } else { /* Which may be the only one... */ first_len = msg_len; -- GitLab From 7a21294b84fa7df3b4a894cbde4ca7fc2dfd3047 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 1 Jun 2018 17:06:28 +0200 Subject: [PATCH 0883/1291] KVM: arm/arm64: Drop resource size check for GICV window [ Upstream commit ba56bc3a0786992755e6804fbcbdc60ef6cfc24c ] When booting a 64 KB pages kernel on a ACPI GICv3 system that implements support for v2 emulation, the following warning is produced GICV size 0x2000 not a multiple of page size 0x10000 and support for v2 emulation is disabled, preventing GICv2 VMs from being able to run on such hosts. The reason is that vgic_v3_probe() performs a sanity check on the size of the window (it should be a multiple of the page size), while the ACPI MADT parsing code hardcodes the size of the window to 8 KB. This makes sense, considering that ACPI does not bother to describe the size in the first place, under the assumption that platforms implementing ACPI will follow the architecture and not put anything else in the same 64 KB window. So let's just drop the sanity check altogether, and assume that the window is at least 64 KB in size. Fixes: 909777324588 ("KVM: arm/arm64: vgic-new: vgic_init: implement kvm_vgic_hyp_init") Signed-off-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-v3.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 6b4fcd52f14c..a37b03c25457 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -492,11 +492,6 @@ int vgic_v3_probe(const struct gic_kvm_info *info) pr_warn("GICV physical address 0x%llx not page aligned\n", (unsigned long long)info->vcpu.start); kvm_vgic_global_state.vcpu_base = 0; - } else if (!PAGE_ALIGNED(resource_size(&info->vcpu))) { - pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", - (unsigned long long)resource_size(&info->vcpu), - PAGE_SIZE); - kvm_vgic_global_state.vcpu_base = 0; } else { kvm_vgic_global_state.vcpu_base = info->vcpu.start; kvm_vgic_global_state.can_emulate_gicv2 = true; -- GitLab From 097a82cb1a71f977a29b888d66620edac9d7c932 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Tue, 23 Jan 2018 12:17:19 +0100 Subject: [PATCH 0884/1291] drm/bridge/sii8620: fix display of packed pixel modes in MHL2 [ Upstream commit e8b92efa629dac0e70ea4145c5e70616de5f89c8 ] Currently packed pixel modes in MHL2 can't be displayed. The device automatically recognizes output format, so setting format other than RGB causes failure. Fix it by writing proper values to registers. Tested on MHL1 and MHL2 using various vendors' dongles both in DVI and HDMI mode. Signed-off-by: Maciej Purski Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1516706239-9104-1-git-send-email-m.purski@samsung.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/sil-sii8620.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 8a8c3ede8821..33bd5d80aa51 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -1038,20 +1038,11 @@ static void sii8620_stop_video(struct sii8620 *ctx) static void sii8620_set_format(struct sii8620 *ctx) { - u8 out_fmt; - if (sii8620_is_mhl3(ctx)) { sii8620_setbits(ctx, REG_M3_P0CTRL, BIT_M3_P0CTRL_MHL3_P0_PIXEL_MODE_PACKED, ctx->use_packed_pixel ? ~0 : 0); } else { - if (ctx->use_packed_pixel) - sii8620_write_seq_static(ctx, - REG_VID_MODE, BIT_VID_MODE_M1080P, - REG_MHL_TOP_CTL, BIT_MHL_TOP_CTL_MHL_PP_SEL | 1, - REG_MHLTX_CTL6, 0x60 - ); - else sii8620_write_seq_static(ctx, REG_VID_MODE, 0, REG_MHL_TOP_CTL, 1, @@ -1059,15 +1050,9 @@ static void sii8620_set_format(struct sii8620 *ctx) ); } - if (ctx->use_packed_pixel) - out_fmt = VAL_TPI_FORMAT(YCBCR422, FULL) | - BIT_TPI_OUTPUT_CSCMODE709; - else - out_fmt = VAL_TPI_FORMAT(RGB, FULL); - sii8620_write_seq(ctx, REG_TPI_INPUT, VAL_TPI_FORMAT(RGB, FULL), - REG_TPI_OUTPUT, out_fmt, + REG_TPI_OUTPUT, VAL_TPI_FORMAT(RGB, FULL), ); } -- GitLab From c55300fcac1c15eb065eac626e17bd7b6d110346 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 4 Apr 2018 14:06:30 -0400 Subject: [PATCH 0885/1291] locking/lockdep: Do not record IRQ state within lockdep code [ Upstream commit fcc784be837714a9173b372ff9fb9b514590dad9 ] While debugging where things were going wrong with mapping enabling/disabling interrupts with the lockdep state and actual real enabling and disabling interrupts, I had to silent the IRQ disabling/enabling in debug_check_no_locks_freed() because it was always showing up as it was called before the splat was. Use raw_local_irq_save/restore() for not only debug_check_no_locks_freed() but for all internal lockdep functions, as they hide useful information about where interrupts were used incorrectly last. Signed-off-by: Steven Rostedt (VMware) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: https://lkml.kernel.org/lkml/20180404140630.3f4f4c7a@gandalf.local.home Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/locking/lockdep.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 2f0f5720b123..d7c155048ea9 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1296,11 +1296,11 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class) this.parent = NULL; this.class = class; - local_irq_save(flags); + raw_local_irq_save(flags); arch_spin_lock(&lockdep_lock); ret = __lockdep_count_forward_deps(&this); arch_spin_unlock(&lockdep_lock); - local_irq_restore(flags); + raw_local_irq_restore(flags); return ret; } @@ -1323,11 +1323,11 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class) this.parent = NULL; this.class = class; - local_irq_save(flags); + raw_local_irq_save(flags); arch_spin_lock(&lockdep_lock); ret = __lockdep_count_backward_deps(&this); arch_spin_unlock(&lockdep_lock); - local_irq_restore(flags); + raw_local_irq_restore(flags); return ret; } @@ -4478,7 +4478,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) if (unlikely(!debug_locks)) return; - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < curr->lockdep_depth; i++) { hlock = curr->held_locks + i; @@ -4489,7 +4489,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock); break; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); -- GitLab From a17ea7fb07acfe4fe2da93a42849137c29740b53 Mon Sep 17 00:00:00 2001 From: Jeffrin Jose T Date: Thu, 21 Jun 2018 22:30:20 +0530 Subject: [PATCH 0886/1291] selftests: bpf: notification about privilege required to run test_kmod.sh testing script [ Upstream commit 81e167c2a216e7b54e6add9d2badcda267fe33b1 ] The test_kmod.sh script require root privilege for the successful execution of the test. This patch is to notify the user about the privilege the script demands for the successful execution of the test. Signed-off-by: Jeffrin Jose T (Rajagiri SET) Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/bpf/test_kmod.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh index ed4774d8d6ed..0f7b9aa9c6a5 100755 --- a/tools/testing/selftests/bpf/test_kmod.sh +++ b/tools/testing/selftests/bpf/test_kmod.sh @@ -1,6 +1,15 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +msg="skip all tests:" +if [ "$(id -u)" != "0" ]; then + echo $msg please run this as root >&2 + exit $ksft_skip +fi + SRC_TREE=../../../../ test_run() -- GitLab From 99654c9ffcea2d835283aa8c570432bd03ca6943 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 22 Jun 2018 09:04:25 +0200 Subject: [PATCH 0887/1291] mtd: dataflash: Use ULL suffix for 64-bit constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cbdceb9b3e1928554fffd0d889adf2d0d8edee4d ] With gcc 4.1.2 when compiling for 32-bit: drivers/mtd/devices/mtd_dataflash.c:736: warning: integer constant is too large for ‘long’ type drivers/mtd/devices/mtd_dataflash.c:737: warning: integer constant is too large for ‘long’ type Add the missing "ULL" suffixes to fix this. Fixes: 67e4145ebf2c161d ("mtd: dataflash: Add flash_info for AT45DB641E") Signed-off-by: Geert Uytterhoeven Acked-by: Andrey Smirnov Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/devices/mtd_dataflash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c index 5dc8bd042cc5..504e34f29518 100644 --- a/drivers/mtd/devices/mtd_dataflash.c +++ b/drivers/mtd/devices/mtd_dataflash.c @@ -737,8 +737,8 @@ static struct flash_info dataflash_data[] = { { "AT45DB642x", 0x1f2800, 8192, 1056, 11, SUP_POW2PS}, { "at45db642d", 0x1f2800, 8192, 1024, 10, SUP_POW2PS | IS_POW2PS}, - { "AT45DB641E", 0x1f28000100, 32768, 264, 9, SUP_EXTID | SUP_POW2PS}, - { "at45db641e", 0x1f28000100, 32768, 256, 8, SUP_EXTID | SUP_POW2PS | IS_POW2PS}, + { "AT45DB641E", 0x1f28000100ULL, 32768, 264, 9, SUP_EXTID | SUP_POW2PS}, + { "at45db641e", 0x1f28000100ULL, 32768, 256, 8, SUP_EXTID | SUP_POW2PS | IS_POW2PS}, }; static struct flash_info *jedec_lookup(struct spi_device *spi, -- GitLab From b580cead624b5e0471cd418b3c0a6f20d15e95ba Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Fri, 22 Jun 2018 13:51:26 +0200 Subject: [PATCH 0888/1291] x86/microcode/intel: Fix memleak in save_microcode_patch() [ Upstream commit 0218c766263e70795c5eaa17d75ed54bca350950 ] Free useless ucode_patch entry when it's replaced. [ bp: Drop the memfree_patch() two-liner. ] Signed-off-by: Zhenzhong Duan Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Srinivas REDDY Eeda Link: http://lkml.kernel.org/r/888102f0-fd22-459d-b090-a1bd8a00cb2b@default Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/intel.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 1c2cfa0644aa..97ccf4c3b45b 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -190,8 +190,11 @@ static void save_microcode_patch(void *data, unsigned int size) p = memdup_patch(data, size); if (!p) pr_err("Error allocating buffer %p\n", data); - else + else { list_replace(&iter->plist, &p->plist); + kfree(iter->data); + kfree(iter); + } } } -- GitLab From f8e7a1bd752a73a59238389eb395eaecc636c8e9 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 21 Jun 2018 19:49:36 +0800 Subject: [PATCH 0889/1291] ipv6: mcast: fix unsolicited report interval after receiving querys [ Upstream commit 6c6da92808442908287fae8ebb0ca041a52469f4 ] After recieving MLD querys, we update idev->mc_maxdelay with max_delay from query header. This make the later unsolicited reports have the same interval with mc_maxdelay, which means we may send unsolicited reports with long interval time instead of default configured interval time. Also as we will not call ipv6_mc_reset() after device up. This issue will be there even after leave the group and join other groups. Fixes: fc4eba58b4c14 ("ipv6: make unsolicited report intervals configurable for mld") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv6/mcast.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 6fd913d63835..d112762b4cb8 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2083,7 +2083,8 @@ void ipv6_mc_dad_complete(struct inet6_dev *idev) mld_send_initial_cr(idev); idev->mc_dad_count--; if (idev->mc_dad_count) - mld_dad_start_timer(idev, idev->mc_maxdelay); + mld_dad_start_timer(idev, + unsolicited_report_interval(idev)); } } @@ -2095,7 +2096,8 @@ static void mld_dad_timer_expire(unsigned long data) if (idev->mc_dad_count) { idev->mc_dad_count--; if (idev->mc_dad_count) - mld_dad_start_timer(idev, idev->mc_maxdelay); + mld_dad_start_timer(idev, + unsolicited_report_interval(idev)); } in6_dev_put(idev); } @@ -2453,7 +2455,8 @@ static void mld_ifc_timer_expire(unsigned long data) if (idev->mc_ifc_count) { idev->mc_ifc_count--; if (idev->mc_ifc_count) - mld_ifc_start_timer(idev, idev->mc_maxdelay); + mld_ifc_start_timer(idev, + unsolicited_report_interval(idev)); } in6_dev_put(idev); } -- GitLab From 4545cfb006b72c017b75eba3bb71deb34e290e0d Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Fri, 22 Jun 2018 10:54:45 -0700 Subject: [PATCH 0890/1291] Smack: Mark inode instant in smack_task_to_inode [ Upstream commit 7b4e88434c4e7982fb053c49657e1c8bbb8692d9 ] Smack: Mark inode instant in smack_task_to_inode /proc clean-up in commit 1bbc55131e59bd099fdc568d3aa0b42634dbd188 resulted in smack_task_to_inode() being called before smack_d_instantiate. This resulted in the smk_inode value being ignored, even while present for files in /proc/self. Marking the inode as instant here fixes that. Signed-off-by: Casey Schaufler Signed-off-by: James Morris Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- security/smack/smack_lsm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 286171a16ed2..fdf01bfd1b07 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2281,6 +2281,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode) struct smack_known *skp = smk_of_task_struct(p); isp->smk_inode = skp; + isp->smk_flags |= SMK_INODE_INSTANT; } /* -- GitLab From 7634aad31a7374af49df2ad1077ad685d561a5fe Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 8 May 2018 10:09:51 -0500 Subject: [PATCH 0891/1291] arm64: dts: msm8916: fix Coresight ETF graph connections [ Upstream commit 6b4154a655a258c67bcfabbd4c3a06637e74ebcd ] The ETF input should be connected to the funnel output, and the ETF output should be connected to the replicator input. The labels are wrong and these got swapped: Warning (graph_endpoint): /soc/funnel@821000/ports/port@8/endpoint: graph connection to node '/soc/etf@825000/ports/port@1/endpoint' is not bidirectional Warning (graph_endpoint): /soc/replicator@824000/ports/port@2/endpoint: graph connection to node '/soc/etf@825000/ports/port@0/endpoint' is not bidirectional Fixes: 7c10da373698 ("arm64: dts: qcom: Add msm8916 CoreSight components") Cc: Ivan T. Ivanov Cc: Mathieu Poirier Cc: Andy Gross Cc: David Brown Cc: linux-arm-msm@vger.kernel.org Signed-off-by: Rob Herring Reviewed-by: Mathieu Poirier Tested-by: Mathieu Poirier Signed-off-by: Andy Gross Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/msm8916.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index 61da6e65900b..3cc449425a03 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -1132,14 +1132,14 @@ ports { port@0 { reg = <0>; - etf_out: endpoint { + etf_in: endpoint { slave-mode; remote-endpoint = <&funnel0_out>; }; }; port@1 { reg = <0>; - etf_in: endpoint { + etf_out: endpoint { remote-endpoint = <&replicator_in>; }; }; -- GitLab From cc210a039ed1812b2ccca6741cd7ea4fa5ff4e4c Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 2 Jun 2018 17:26:34 +0200 Subject: [PATCH 0892/1291] batman-adv: Fix bat_ogm_iv best gw refcnt after netlink dump [ Upstream commit b5685d2687d6612adf5eac519eb7008f74dfd1ec ] A reference for the best gateway is taken when the list of gateways in the mesh is sent via netlink. This is necessary to check whether the currently dumped entry is the currently selected gateway or not. This information is then transferred as flag BATADV_ATTR_FLAG_BEST. After the comparison of the current entry is done, batadv_iv_gw_dump_entry() has to decrease the reference counter again. Otherwise the reference will be held and thus prevents a proper shutdown of the batman-adv interfaces (and some of the interfaces enslaved in it). Fixes: efb766af06e3 ("batman-adv: add B.A.T.M.A.N. IV bat_gw_dump implementations") Reported-by: Andreas Ziegler Tested-by: Andreas Ziegler Signed-off-by: Sven Eckelmann Acked-by: Marek Lindner Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_iv_ogm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 71d8809fbe94..5bd9b389f8c9 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -2718,7 +2718,7 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, { struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_neigh_node *router; - struct batadv_gw_node *curr_gw; + struct batadv_gw_node *curr_gw = NULL; int ret = 0; void *hdr; @@ -2766,6 +2766,8 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, ret = 0; out: + if (curr_gw) + batadv_gw_node_put(curr_gw); if (router_ifinfo) batadv_neigh_ifinfo_put(router_ifinfo); if (router) -- GitLab From d50e1f18eacef0a50be5a20da2dc1c8cc8af544b Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 2 Jun 2018 17:26:35 +0200 Subject: [PATCH 0893/1291] batman-adv: Fix bat_v best gw refcnt after netlink dump [ Upstream commit 9713cb0cf19f1cec6c007e3b37be0697042b6720 ] A reference for the best gateway is taken when the list of gateways in the mesh is sent via netlink. This is necessary to check whether the currently dumped entry is the currently selected gateway or not. This information is then transferred as flag BATADV_ATTR_FLAG_BEST. After the comparison of the current entry is done, batadv_v_gw_dump_entry() has to decrease the reference counter again. Otherwise the reference will be held and thus prevents a proper shutdown of the batman-adv interfaces (and some of the interfaces enslaved in it). Fixes: b71bb6f924fe ("batman-adv: add B.A.T.M.A.N. V bat_gw_dump implementations") Signed-off-by: Sven Eckelmann Acked-by: Marek Lindner Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_v.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index a8f4c3902cf5..371a1f1651b4 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -929,7 +929,7 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, { struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_neigh_node *router; - struct batadv_gw_node *curr_gw; + struct batadv_gw_node *curr_gw = NULL; int ret = 0; void *hdr; @@ -997,6 +997,8 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, ret = 0; out: + if (curr_gw) + batadv_gw_node_put(curr_gw); if (router_ifinfo) batadv_neigh_ifinfo_put(router_ifinfo); if (router) -- GitLab From 44fed77fc7e45e0fe6c41286df9134d747b2bd73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Thu, 7 Jun 2018 00:46:23 +0200 Subject: [PATCH 0894/1291] batman-adv: Avoid storing non-TT-sync flags on singular entries too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4a519b83da16927fb98fd32b0f598e639d1f1859 ] Since commit 54e22f265e87 ("batman-adv: fix TT sync flag inconsistencies") TT sync flags and TT non-sync'd flags are supposed to be stored separately. The previous patch missed to apply this separation on a TT entry with only a single TT orig entry. This is a minor fix because with only a single TT orig entry the DDoS issue the former patch solves does not apply. Fixes: 54e22f265e87 ("batman-adv: fix TT sync flag inconsistencies") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/translation-table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 8a3ce79b1307..e14f7c6436b4 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1679,7 +1679,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, ether_addr_copy(common->addr, tt_addr); common->vid = vid; - common->flags = flags; + common->flags = flags & (~BATADV_TT_SYNC_MASK); + tt_global_entry->roam_at = 0; /* node must store current time in case of roaming. This is * needed to purge this entry out on timeout (if nobody claims -- GitLab From 04954b2dca347ff9ec47ca9f0baaf5d27b384d10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Thu, 7 Jun 2018 00:46:24 +0200 Subject: [PATCH 0895/1291] batman-adv: Fix multicast TT issues with bogus ROAM flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a44ebeff6bbd6ef50db41b4195fca87b21aefd20 ] When a (broken) node wrongly sends multicast TT entries with a ROAM flag then this causes any receiving node to drop all entries for the same multicast MAC address announced by other nodes, leading to packet loss. Fix this DoS vector by only storing TT sync flags. For multicast TT non-sync'ing flag bits like ROAM are unused so far anyway. Fixes: 1d8ab8d3c176 ("batman-adv: Modified forwarding behaviour for multicast packets") Reported-by: Leonardo Mörlein Signed-off-by: Linus Lüssing Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/translation-table.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index e14f7c6436b4..0f4d4eece3e4 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1679,7 +1679,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, ether_addr_copy(common->addr, tt_addr); common->vid = vid; - common->flags = flags & (~BATADV_TT_SYNC_MASK); + if (!is_multicast_ether_addr(common->addr)) + common->flags = flags & (~BATADV_TT_SYNC_MASK); tt_global_entry->roam_at = 0; /* node must store current time in case of roaming. This is @@ -1743,7 +1744,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, * TT_CLIENT_TEMP, therefore they have to be copied in the * client entry */ - common->flags |= flags & (~BATADV_TT_SYNC_MASK); + if (!is_multicast_ether_addr(common->addr)) + common->flags |= flags & (~BATADV_TT_SYNC_MASK); /* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only * one originator left in the list and we previously received a -- GitLab From 9265a4509509eed4005fda908de5a154cc8c7e41 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Sat, 23 Jun 2018 20:28:26 +0530 Subject: [PATCH 0896/1291] cxgb4: when disabling dcb set txq dcb priority to 0 [ Upstream commit 5ce36338a30f9814fc4824f9fe6c20cd83d872c7 ] When we are disabling DCB, store "0" in txq->dcb_prio since that's used for future TX Work Request "OVLAN_IDX" values. Setting non zero priority upon disabling DCB would halt the traffic. Reported-by: AMG Zollner Robert CC: David Ahern Signed-off-by: Casey Leedom Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 44a0d04dd8a0..74a42f12064b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -253,7 +253,7 @@ static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable) "Can't %s DCB Priority on port %d, TX Queue %d: err=%d\n", enable ? "set" : "unset", pi->port_id, i, -err); else - txq->dcb_prio = value; + txq->dcb_prio = enable ? value : 0; } } -- GitLab From dc68381f4919fc312dd7a16871304264c0d2fa1c Mon Sep 17 00:00:00 2001 From: Tomasz Duszynski Date: Mon, 28 May 2018 17:38:59 +0200 Subject: [PATCH 0897/1291] iio: pressure: bmp280: fix relative humidity unit [ Upstream commit 13399ff25f179811ce9c1df1523eb39f9e4a4772 ] According to IIO ABI relative humidity reading should be returned in milli percent. This patch addresses that by applying proper scaling and returning integer instead of fractional format type specifier. Note that the fixes tag is before the driver was heavily refactored to introduce spi support, so the patch won't apply that far back. Signed-off-by: Tomasz Duszynski Fixes: 14beaa8f5ab1 ("iio: pressure: bmp280: add humidity support") Acked-by: Matt Ranostay Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iio/pressure/bmp280-core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index 8f26428804a2..5f625ffa2a88 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -362,10 +362,9 @@ static int bmp280_read_humid(struct bmp280_data *data, int *val, int *val2) } comp_humidity = bmp280_compensate_humidity(data, adc_humidity); - *val = comp_humidity; - *val2 = 1024; + *val = comp_humidity * 1000 / 1024; - return IIO_VAL_FRACTIONAL; + return IIO_VAL_INT; } static int bmp280_read_raw(struct iio_dev *indio_dev, -- GitLab From fc3b4e774cec694e3f7444adb5d16c974f4430bf Mon Sep 17 00:00:00 2001 From: Michael Trimarchi Date: Wed, 30 May 2018 11:06:34 +0200 Subject: [PATCH 0898/1291] brcmfmac: stop watchdog before detach and free everything [ Upstream commit 373c83a801f15b1e3d02d855fad89112bd4ccbe0 ] Using built-in in kernel image without a firmware in filesystem or in the kernel image can lead to a kernel NULL pointer deference. Watchdog need to be stopped in brcmf_sdio_remove The system is going down NOW! [ 1348.110759] Unable to handle kernel NULL pointer dereference at virtual address 000002f8 Sent SIGTERM to all processes [ 1348.121412] Mem abort info: [ 1348.126962] ESR = 0x96000004 [ 1348.130023] Exception class = DABT (current EL), IL = 32 bits [ 1348.135948] SET = 0, FnV = 0 [ 1348.138997] EA = 0, S1PTW = 0 [ 1348.142154] Data abort info: [ 1348.145045] ISV = 0, ISS = 0x00000004 [ 1348.148884] CM = 0, WnR = 0 [ 1348.151861] user pgtable: 4k pages, 48-bit VAs, pgdp = (____ptrval____) [ 1348.158475] [00000000000002f8] pgd=0000000000000000 [ 1348.163364] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 1348.168927] Modules linked in: ipv6 [ 1348.172421] CPU: 3 PID: 1421 Comm: brcmf_wdog/mmc0 Not tainted 4.17.0-rc5-next-20180517 #18 [ 1348.180757] Hardware name: Amarula A64-Relic (DT) [ 1348.185455] pstate: 60000005 (nZCv daif -PAN -UAO) [ 1348.190251] pc : brcmf_sdiod_freezer_count+0x0/0x20 [ 1348.195124] lr : brcmf_sdio_watchdog_thread+0x64/0x290 [ 1348.200253] sp : ffff00000b85be30 [ 1348.203561] x29: ffff00000b85be30 x28: 0000000000000000 [ 1348.208868] x27: ffff00000b6cb918 x26: ffff80003b990638 [ 1348.214176] x25: ffff0000087b1a20 x24: ffff80003b94f800 [ 1348.219483] x23: ffff000008e620c8 x22: ffff000008f0b660 [ 1348.224790] x21: ffff000008c6a858 x20: 00000000fffffe00 [ 1348.230097] x19: ffff80003b94f800 x18: 0000000000000001 [ 1348.235404] x17: 0000ffffab2e8a74 x16: ffff0000080d7de8 [ 1348.240711] x15: 0000000000000000 x14: 0000000000000400 [ 1348.246018] x13: 0000000000000400 x12: 0000000000000001 [ 1348.251324] x11: 00000000000002c4 x10: 0000000000000a10 [ 1348.256631] x9 : ffff00000b85bc40 x8 : ffff80003be11870 [ 1348.261937] x7 : ffff80003dfc7308 x6 : 000000078ff08b55 [ 1348.267243] x5 : 00000139e1058400 x4 : 0000000000000000 [ 1348.272550] x3 : dead000000000100 x2 : 958f2788d6618100 [ 1348.277856] x1 : 00000000fffffe00 x0 : 0000000000000000 Signed-off-by: Michael Trimarchi Acked-by: Arend van Spriel Tested-by: Andy Shevchenko Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index eccd25febfe6..4c28b04ea605 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -4245,6 +4245,13 @@ void brcmf_sdio_remove(struct brcmf_sdio *bus) brcmf_dbg(TRACE, "Enter\n"); if (bus) { + /* Stop watchdog task */ + if (bus->watchdog_tsk) { + send_sig(SIGTERM, bus->watchdog_tsk, 1); + kthread_stop(bus->watchdog_tsk); + bus->watchdog_tsk = NULL; + } + /* De-register interrupt handler */ brcmf_sdiod_intr_unregister(bus->sdiodev); -- GitLab From 36bc1e1e69beb66b1f9e1ac157ec8efefb84972f Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sun, 17 Jun 2018 13:53:09 +0200 Subject: [PATCH 0899/1291] ARM: dts: am437x: make edt-ft5x06 a wakeup source [ Upstream commit 49a6ec5b807ea4ad7ebe1f58080ebb8497cb2d2c ] The touchscreen driver no longer configures the device as wakeup source by default. A "wakeup-source" property is needed. Signed-off-by: Daniel Mack Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/am437x-sk-evm.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts index 2c6bf0684f50..094fd0ea91a0 100644 --- a/arch/arm/boot/dts/am437x-sk-evm.dts +++ b/arch/arm/boot/dts/am437x-sk-evm.dts @@ -535,6 +535,8 @@ edt-ft5306@38 { touchscreen-size-x = <480>; touchscreen-size-y = <272>; + + wakeup-source; }; tlv320aic3106: tlv320aic3106@1b { -- GitLab From b171162cbd941b24f5663ee853a2733b0fd9087a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 25 Jun 2018 11:13:59 +0200 Subject: [PATCH 0900/1291] ALSA: seq: Fix UBSAN warning at SNDRV_SEQ_IOCTL_QUERY_NEXT_CLIENT ioctl [ Upstream commit c9a4c63888dbb79ce4d068ca1dd8b05bc3f156b1 ] The kernel may spew a WARNING with UBSAN undefined behavior at handling ALSA sequencer ioctl SNDRV_SEQ_IOCTL_QUERY_NEXT_CLIENT: UBSAN: Undefined behaviour in sound/core/seq/seq_clientmgr.c:2007:14 signed integer overflow: 2147483647 + 1 cannot be represented in type 'int' Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x122/0x1c8 lib/dump_stack.c:113 ubsan_epilogue+0x12/0x86 lib/ubsan.c:159 handle_overflow+0x1c2/0x21f lib/ubsan.c:190 __ubsan_handle_add_overflow+0x2a/0x31 lib/ubsan.c:198 snd_seq_ioctl_query_next_client+0x1ac/0x1d0 sound/core/seq/seq_clientmgr.c:2007 snd_seq_ioctl+0x264/0x3d0 sound/core/seq/seq_clientmgr.c:2144 .... It happens only when INT_MAX is passed there, as we're incrementing it unconditionally. So the fix is trivial, check the value with INT_MAX. Although the bug itself is fairly harmless, it's better to fix it so that fuzzers won't hit this again later. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200211 Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_clientmgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index a4c571cb3b87..350c33ec82b3 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -2001,7 +2001,8 @@ static int snd_seq_ioctl_query_next_client(struct snd_seq_client *client, struct snd_seq_client *cptr = NULL; /* search for next client */ - info->client++; + if (info->client < INT_MAX) + info->client++; if (info->client < 0) info->client = 0; for (; info->client < SNDRV_SEQ_MAX_CLIENTS; info->client++) { -- GitLab From 13a049bebb760d185ffc04f0a3c73efd9aec87f0 Mon Sep 17 00:00:00 2001 From: Dongjiu Geng Date: Thu, 21 Jun 2018 16:19:43 +0300 Subject: [PATCH 0901/1291] usb: xhci: remove the code build warning [ Upstream commit 36eb93509c45d0bdbd8d09a01ab9d857972f5963 ] Initialize the 'err' variate to remove the build warning, the warning is shown as below: drivers/usb/host/xhci-tegra.c: In function 'tegra_xusb_mbox_thread': drivers/usb/host/xhci-tegra.c:552:6: warning: 'err' may be used uninitialized in this function [-Wuninitialized] drivers/usb/host/xhci-tegra.c:482:6: note: 'err' was declared here Fixes: e84fce0f8837 ("usb: xhci: Add NVIDIA Tegra XUSB controller driver") Signed-off-by: Dongjiu Geng Acked-by: Thierry Reding Acked-by: Jon Hunter Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index 74436f8ca538..32ddafe7af87 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -482,7 +482,7 @@ static void tegra_xusb_mbox_handle(struct tegra_xusb *tegra, unsigned long mask; unsigned int port; bool idle, enable; - int err; + int err = 0; memset(&rsp, 0, sizeof(rsp)); -- GitLab From 8a7ac5ee6856acb2487cd657e538d55324e29d45 Mon Sep 17 00:00:00 2001 From: Ajay Gupta Date: Thu, 21 Jun 2018 16:19:45 +0300 Subject: [PATCH 0902/1291] usb: xhci: increase CRS timeout value [ Upstream commit 305886ca87be480ae159908c2affd135c04215cf ] Some controllers take almost 55ms to complete controller restore state (CRS). There is no timeout limit mentioned in xhci specification so fixing the issue by increasing the timeout limit to 100ms [reformat code comment -Mathias] Signed-off-by: Ajay Gupta Signed-off-by: Nagaraj Annaiah Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index fe84b36627ec..6b11fd9d8efe 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1024,8 +1024,13 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) command = readl(&xhci->op_regs->command); command |= CMD_CRS; writel(command, &xhci->op_regs->command); + /* + * Some controllers take up to 55+ ms to complete the controller + * restore so setting the timeout to 100ms. Xhci specification + * doesn't mention any timeout value. + */ if (xhci_handshake(&xhci->op_regs->status, - STS_RESTORE, 0, 10 * 1000)) { + STS_RESTORE, 0, 100 * 1000)) { xhci_warn(xhci, "WARN: xHC restore state timeout\n"); spin_unlock_irq(&xhci->lock); return -ETIMEDOUT; -- GitLab From d341ce9a63dc3058293ae833107b42a5b40a6139 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 7 Jun 2018 15:54:48 +0200 Subject: [PATCH 0903/1291] NFC: pn533: Fix wrong GFP flag usage [ Upstream commit ecc443c03fb14abfb8a6af5e3b2d43b5257e60f2 ] pn533_recv_response() is an urb completion handler, so it must use GFP_ATOMIC. pn533_usb_send_frame() OTOH runs from a regular sleeping context, so the pn533_submit_urb_for_response() there (and only there) can use the regular GFP_KERNEL flags. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1514134 Fixes: 9815c7cf22da ("NFC: pn533: Separate physical layer from ...") Cc: Michael Thalmeier Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/pn533/usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index d5553c47014f..5d823e965883 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -74,7 +74,7 @@ static void pn533_recv_response(struct urb *urb) struct sk_buff *skb = NULL; if (!urb->status) { - skb = alloc_skb(urb->actual_length, GFP_KERNEL); + skb = alloc_skb(urb->actual_length, GFP_ATOMIC); if (!skb) { nfc_err(&phy->udev->dev, "failed to alloc memory\n"); } else { @@ -186,7 +186,7 @@ static int pn533_usb_send_frame(struct pn533 *dev, if (dev->protocol_type == PN533_PROTO_REQ_RESP) { /* request for response for sent packet directly */ - rc = pn533_submit_urb_for_response(phy, GFP_ATOMIC); + rc = pn533_submit_urb_for_response(phy, GFP_KERNEL); if (rc) goto error; } else if (dev->protocol_type == PN533_PROTO_REQ_ACK_RESP) { -- GitLab From 5a64e0870cc1db2be77bbb95d209d7dcb2186686 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Jun 2018 16:17:14 +0300 Subject: [PATCH 0904/1291] typec: tcpm: Fix a msecs vs jiffies bug [ Upstream commit 9578bcd0bb487b8ecef4b7eee799aafb678aa441 ] The tcpm_set_state() function take msecs not jiffies. Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)") Signed-off-by: Dan Carpenter Acked-by: Heikki Krogerus Reviewed-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/typec/tcpm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/typec/tcpm.c b/drivers/staging/typec/tcpm.c index 8af62e74d54c..f237e31926f4 100644 --- a/drivers/staging/typec/tcpm.c +++ b/drivers/staging/typec/tcpm.c @@ -2479,7 +2479,8 @@ static void run_state_machine(struct tcpm_port *port) tcpm_port_is_sink(port) && time_is_after_jiffies(port->delayed_runtime)) { tcpm_set_state(port, SNK_DISCOVERY, - port->delayed_runtime - jiffies); + jiffies_to_msecs(port->delayed_runtime - + jiffies)); break; } tcpm_set_state(port, unattached_state(port), 0); -- GitLab From 2f9e98dbccf7fe27eb6fc218d81befc5d3056ca2 Mon Sep 17 00:00:00 2001 From: Dirk Gouders Date: Thu, 21 Jun 2018 15:30:54 +0200 Subject: [PATCH 0905/1291] kconfig: fix line numbers for if-entries in menu tree [ Upstream commit b2d00d7c61c84edd150310af3f556f8a3c6e2e67 ] The line numers for if-entries in the menu tree are off by one or more lines which is confusing when debugging for correctness of unrelated changes. According to the git log, commit a02f0570ae201c49 (kconfig: improve error handling in the parser) was the last one that changed that part of the parser and replaced "if_entry: T_IF expr T_EOL" by "if_entry: T_IF expr nl" but the commit message does not state why this has been done. When reverting that part of the commit, only the line numers are corrected (checked with cdebug = DEBUG_PARSE in zconf.y), otherwise the menu tree remains unchanged (checked with zconfdump() enabled in conf.c). An example for the corrected line numbers: drivers/soc/Kconfig:15:source drivers/soc/tegra/Kconfig drivers/soc/tegra/Kconfig:4:if drivers/soc/tegra/Kconfig:6:if changes to: drivers/soc/Kconfig:15:source drivers/soc/tegra/Kconfig drivers/soc/tegra/Kconfig:1:if drivers/soc/tegra/Kconfig:4:if Signed-off-by: Dirk Gouders Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- scripts/kconfig/zconf.y | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y index 20d9caa4be99..126e3f2e1ed7 100644 --- a/scripts/kconfig/zconf.y +++ b/scripts/kconfig/zconf.y @@ -31,7 +31,7 @@ struct symbol *symbol_hash[SYMBOL_HASHSIZE]; static struct menu *current_menu, *current_entry; %} -%expect 32 +%expect 31 %union { @@ -345,7 +345,7 @@ choice_block: /* if entry */ -if_entry: T_IF expr nl +if_entry: T_IF expr T_EOL { printd(DEBUG_PARSE, "%s:%d:if\n", zconf_curname(), zconf_lineno()); menu_add_entry(NULL); -- GitLab From 6367516474c8589510b5ce8b2ccf68dff40ecc8b Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 11 Jun 2018 09:31:52 +0200 Subject: [PATCH 0906/1291] perf record: Support s390 random socket_id assignment [ Upstream commit 01766229533f9bdb1144a41b4345c8c7286da7b4 ] On s390 the socket identifier assigned to a CPU identifier is random and (depending on the configuration of the LPAR) may be higher than the CPU identifier. This is currently not supported. Fix this by allowing arbitrary socket identifiers being assigned to CPU id. Output before: [root@p23lp27 perf]# ./perf report --header -I -v ... socket_id number is too big.You may need to upgrade the perf tool. Error: The perf.data file has no samples! # ======== # captured on : Tue May 29 09:29:57 2018 # header version : 1 ... # Core ID and Socket ID information is not available ... [root@p23lp27 perf]# Output after: [root@p23lp27 perf]# ./perf report --header -I -v ... Error: The perf.data file has no samples! # ======== # captured on : Tue May 29 09:29:57 2018 # header version : 1 ... # CPU 0: Core ID 0, Socket ID 6 # CPU 1: Core ID 1, Socket ID 3 # CPU 2: Core ID -1, Socket ID -1 ... [root@p23lp27 perf]# Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180611073153.15592-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/header.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ba0cea8fef72..8a678a3d5a2a 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1824,6 +1824,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) int cpu_nr = ff->ph->env.nr_cpus_avail; u64 size = 0; struct perf_header *ph = ff->ph; + bool do_core_id_test = true; ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu)); if (!ph->env.cpu) @@ -1878,6 +1879,13 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) return 0; } + /* On s390 the socket_id number is not related to the numbers of cpus. + * The socket_id number might be higher than the numbers of cpus. + * This depends on the configuration. + */ + if (ph->env.arch && !strncmp(ph->env.arch, "s390", 4)) + do_core_id_test = false; + for (i = 0; i < (u32)cpu_nr; i++) { if (do_read_u32(ff, &nr)) goto free_cpu; @@ -1887,7 +1895,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) if (do_read_u32(ff, &nr)) goto free_cpu; - if (nr != (u32)-1 && nr > (u32)cpu_nr) { + if (do_core_id_test && nr != (u32)-1 && nr > (u32)cpu_nr) { pr_debug("socket_id number is too big." "You may need to upgrade the perf tool.\n"); goto free_cpu; -- GitLab From f33194bca6c3ac011f290c884219474b84bde6c5 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 11 Jun 2018 09:31:53 +0200 Subject: [PATCH 0907/1291] perf test session topology: Fix test on s390 [ Upstream commit b930e62ecd362843002bdf84c2940439822af321 ] On s390 this test case fails because the socket identifiction numbers assigned to the CPU are higher than the CPU identification numbers. F/ix this by adding the platform architecture into the perf data header flag information. This helps identifiing the test platform and handles s390 specifics in process_cpu_topology(). Before: [root@p23lp27 perf]# perf test -vvvvv -F 39 39: Session topology : --- start --- templ file: /tmp/perf-test-iUv755 socket_id number is too big.You may need to upgrade the perf tool. ---- end ---- Session topology: Skip [root@p23lp27 perf]# After: [root@p23lp27 perf]# perf test -vvvvv -F 39 39: Session topology : --- start --- templ file: /tmp/perf-test-8X8VTs CPU 0, core 0, socket 6 CPU 1, core 1, socket 3 ---- end ---- Session topology: Ok [root@p23lp27 perf]# Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Fixes: c84974ed9fb6 ("perf test: Add entry to test cpu topology") Link: http://lkml.kernel.org/r/20180611073153.15592-2-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/tests/topology.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 81ede20f49d7..4e9dad8c9763 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -43,6 +43,7 @@ static int session_write_header(char *path) perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); perf_header__set_feat(&session->header, HEADER_NRCPUS); + perf_header__set_feat(&session->header, HEADER_ARCH); session->header.data_size += DATA_SIZE; -- GitLab From b4d84dcee9efe0197d1d74bae648324850f94593 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 11 Jun 2018 16:10:49 +0530 Subject: [PATCH 0908/1291] perf report powerpc: Fix crash if callchain is empty [ Upstream commit 143c99f6ac6812d23254e80844d6e34be897d3e1 ] For some cases, the callchain provided by the kernel may be empty. So, the callchain ip filtering code will cause a crash if we do not check whether the struct ip_callchain pointer is NULL before accessing any members. This can be observed on a powerpc64le system running Fedora 27 as shown below. # perf record -b -e cycles:u ls Before: # perf report --branch-history perf: Segmentation fault -------- backtrace -------- perf[0x1027615c] linux-vdso64.so.1(__kernel_sigtramp_rt64+0x0)[0x7fff856304d8] perf(arch_skip_callchain_idx+0x44)[0x10257c58] perf[0x1017f2e4] perf(thread__resolve_callchain+0x124)[0x1017ff5c] perf(sample__resolve_callchain+0xf0)[0x10172788] ... After: # perf report --branch-history Samples: 25 of event 'cycles:u', Event count (approx.): 2306870 Overhead Source:Line Symbol Shared Object + 11.60% _init+35736 [.] _init ls + 9.84% strcoll_l.c:137 [.] __strcoll_l libc-2.26.so + 9.16% memcpy.S:175 [.] __memcpy_power7 libc-2.26.so + 9.01% gconv_charset.h:54 [.] _nl_find_locale libc-2.26.so + 8.87% dl-addr.c:52 [.] _dl_addr libc-2.26.so + 8.83% _init+236 [.] _init ls ... Reported-by: Ravi Bangoria Signed-off-by: Sandipan Das Acked-by: Ravi Bangoria Cc: Jiri Olsa Cc: Naveen N. Rao Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/20180611104049.11048-1-sandipan@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/powerpc/util/skip-callchain-idx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 0c370f81e002..bd630c222e65 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -243,7 +243,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain) u64 ip; u64 skip_slot = -1; - if (chain->nr < 3) + if (!chain || chain->nr < 3) return skip_slot; ip = chain->ips[2]; -- GitLab From 28dbfc731fec37ed88e59e6470051f19fc32aec9 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 16 Jun 2018 10:47:39 -0700 Subject: [PATCH 0909/1291] perf tools: Fix a clang 7.0 compilation error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c6555c14572aeadf4fe2819abd971c4e7608b926 ] Arnaldo reported the perf build failure with latest llvm/clang compiler (7.0). $ make LIBCLANGLLVM=1 -C tools/perf/ CC /tmp/tmp.t53Qo38zci/tests/kmod-path.o util/c++/clang.cpp: In function ‘std::unique_ptr > perf::getBPFObjectFromModule(llvm::Module*)’: util/c++/clang.cpp:150:43: error: no matching function for call to ‘llvm::TargetMachine::addPassesToEmitFile(llvm::legacy::PassManager&, llvm::raw_svector_ostream&, llvm::TargetMachine::CodeGenFileType)’ TargetMachine::CGFT_ObjectFile)) { ^ In file included from util/c++/clang.cpp:25:0: /usr/local/include/llvm/Target/TargetMachine.h:254:16: note: candidate: virtual bool llvm::TargetMachine::addPassesToEmitFile( llvm::legacy::PassManagerBase&, llvm::raw_pwrite_stream&, llvm::raw_pwrite_stream*, llvm::TargetMachine::CodeGenFileType, bool, llvm::MachineModuleInfo*) virtual bool addPassesToEmitFile(PassManagerBase &, raw_pwrite_stream &, ^~~~~~~~~~~~~~~~~~~ /usr/local/include/llvm/Target/TargetMachine.h:254:16: note: candidate expects 6 arguments, 3 provided mv: cannot stat '/tmp/tmp.t53Qo38zci/util/c++/.clang.o.tmp': No such file or directory make[7]: *** [/home/acme/git/perf/tools/build/Makefile.build:101: /tmp/tmp.t53Qo38zci/util/c++/clang.o] Error 1 make[6]: *** [/home/acme/git/perf/tools/build/Makefile.build:139: c++] Error 2 make[5]: *** [/home/acme/git/perf/tools/build/Makefile.build:139: util] Error 2 make[5]: *** Waiting for unfinished jobs.... CC /tmp/tmp.t53Qo38zci/tests/thread-map.o The function addPassesToEmitFile signature changed in llvm 7.0 and such a change caused the failure. This patch fixed the issue with using proper function signatures under different compiler versions. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Yonghong Song Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Martin KaFai Lau Cc: Wang Nan Link: http://lkml.kernel.org/r/20180616174739.1076733-1-yhs@fb.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/c++/clang.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index bf31ceab33bd..89512504551b 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -146,8 +146,15 @@ getBPFObjectFromModule(llvm::Module *Module) raw_svector_ostream ostream(*Buffer); legacy::PassManager PM; - if (TargetMachine->addPassesToEmitFile(PM, ostream, - TargetMachine::CGFT_ObjectFile)) { + bool NotAdded; +#if CLANG_VERSION_MAJOR < 7 + NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, + TargetMachine::CGFT_ObjectFile); +#else + NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, nullptr, + TargetMachine::CGFT_ObjectFile); +#endif + if (NotAdded) { llvm::errs() << "TargetMachine can't emit a file of this type\n"; return std::unique_ptr>(nullptr);; } -- GitLab From 0f868ad81facb32633f528bf5674cd3db025671b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Jun 2018 11:40:36 +0200 Subject: [PATCH 0910/1291] perf bench: Fix numa report output code [ Upstream commit 983107072be1a39cbde67d45cb0059138190e015 ] Currently we can hit following assert when running numa bench: $ perf bench numa mem -p 3 -t 1 -P 512 -s 100 -zZ0cm --thp 1 perf: bench/numa.c:1577: __bench_numa: Assertion `!(!(((wait_stat) & 0x7f) == 0))' failed. The assertion is correct, because we hit the SIGFPE in following line: Thread 2.2 "thread 0/0" received signal SIGFPE, Arithmetic exception. [Switching to Thread 0x7fffd28c6700 (LWP 11750)] 0x000.. in worker_thread (__tdata=0x7.. ) at bench/numa.c:1257 1257 td->speed_gbs = bytes_done / (td->runtime_ns / NSEC_PER_SEC) / 1e9; We don't check if the runtime is actually bigger than 1 second, and thus this might end up with zero division within FPU. Adding the check to prevent this. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180620094036.17278-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/bench/numa.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 944070e98a2c..0afcc7eccc61 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1098,7 +1098,7 @@ static void *worker_thread(void *__tdata) u8 *global_data; u8 *process_data; u8 *thread_data; - u64 bytes_done; + u64 bytes_done, secs; long work_done; u32 l; struct rusage rusage; @@ -1254,7 +1254,8 @@ static void *worker_thread(void *__tdata) timersub(&stop, &start0, &diff); td->runtime_ns = diff.tv_sec * NSEC_PER_SEC; td->runtime_ns += diff.tv_usec * NSEC_PER_USEC; - td->speed_gbs = bytes_done / (td->runtime_ns / NSEC_PER_SEC) / 1e9; + secs = td->runtime_ns / NSEC_PER_SEC; + td->speed_gbs = secs ? bytes_done / secs / 1e9 : 0; getrusage(RUSAGE_THREAD, &rusage); td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC; -- GitLab From 1ea053d1cc8bc6e52e5ecb34cafc1e41d33d5602 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Thu, 17 May 2018 20:20:52 -0500 Subject: [PATCH 0911/1291] ARM: davinci: board-da850-evm: fix WP pin polarity for MMC/SD [ Upstream commit 1b6fe9798af8cb7d80fad5dd30ee1bcd1e0f51eb ] When booting from MMC/SD in rw mode on DA850 EVM, the system crashes because the write protect pin should be active high and not active low. This patch fixes the polarity of the WP pin. Fixes: bdf0e8364fd3 ("ARM: davinci: da850-evm: use gpio descriptor for mmc pins") Signed-off-by: Adam Ford Signed-off-by: Sekhar Nori Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-davinci/board-da850-evm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index 25f12118c364..2f6ac1afa804 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -773,7 +773,7 @@ static struct gpiod_lookup_table mmc_gpios_table = { GPIO_LOOKUP("davinci_gpio.0", DA850_MMCSD_CD_PIN, "cd", GPIO_ACTIVE_LOW), GPIO_LOOKUP("davinci_gpio.0", DA850_MMCSD_WP_PIN, "wp", - GPIO_ACTIVE_LOW), + GPIO_ACTIVE_HIGH), }, }; -- GitLab From 4c3b0ae79f0736993c444980c40ebf7a103b6f39 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 20 Jun 2018 18:33:45 +0200 Subject: [PATCH 0912/1291] netfilter: nf_log: fix uninit read in nf_log_proc_dostring [ Upstream commit dffd22aed2aa1e804bccf19b30a421e89ee2ae61 ] When proc_dostring() is called with a non-zero offset in strict mode, it doesn't just write to the ->data buffer, it also reads. Make sure it doesn't read uninitialized data. Fixes: c6ac37d8d884 ("netfilter: nf_log: fix error on write NONE to [...]") Signed-off-by: Jann Horn Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_log.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 276324abfa60..cdc744aa5889 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -440,6 +440,10 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, if (write) { struct ctl_table tmp = *table; + /* proc_dostring() can append to existing strings, so we need to + * initialize it as an empty string. + */ + buf[0] = '\0'; tmp.data = buf; r = proc_dostring(&tmp, write, buffer, lenp, ppos); if (r) -- GitLab From 77c967872e8efe1341696bacfa1f793aedcc9f0d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 19 Jun 2018 18:20:34 +0800 Subject: [PATCH 0913/1291] ceph: fix dentry leak in splice_dentry() [ Upstream commit 8b8f53af1ed9df88a4c0fbfdf3db58f62060edf3 ] In any case, d_splice_alias() does not drop reference of original dentry. Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/ceph/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index f2550a076edc..d5124ed35154 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1087,6 +1087,7 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in) if (IS_ERR(realdn)) { pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", PTR_ERR(realdn), dn, in, ceph_vinop(in)); + dput(dn); dn = realdn; /* note realdn contains the error */ goto out; } else if (realdn) { -- GitLab From a5b6bb86cc128b8bbcdd8fdccd2567c09bdc0ac4 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 31 May 2018 12:24:48 +0300 Subject: [PATCH 0914/1291] net/mlx5: E-Switch, Disallow vlan/spoofcheck setup if not being esw manager [ Upstream commit a8d70a054a718b63058b3d3ac58b6181815e4289 ] In smartnic env, if the host (PF) driver is not an e-switch manager, we are not allowed to apply eswitch ports setups such as vlan (VST), spoof-checks, min/max rate or state. Make sure we are eswitch manager when coming to issue these callbacks and err otherwise. Also fix the definition of ESW_ALLOWED to rely on eswitch_manager capability and on the vport_group_manger. Operations on the VF nic vport context, such as setting a mac or reading the vport counters are allowed to the PF in this scheme. The modify nic vport guid code was modified to omit checking the nic_vport_node_guid_modify eswitch capability. The reason for doing so is that modifying node guid requires vport group manager capability, and there's no need to check further capabilities. 1. set_vf_vlan - disallowed 2. set_vf_spoofchk - disallowed 3. set_vf_mac - allowed 4. get_vf_config - allowed 5. set_vf_trust - disallowed 6. set_vf_rate - disallowed 7. get_vf_stat - allowed 8. set_vf_link_state - disallowed Fixes: f942380c1239 ('net/mlx5: E-Switch, Vport ingress/egress ACLs rules for spoofchk') Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Tested-by: Or Gerlitz Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 12 +++++------- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 2 -- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index f697084937c3..de72b66df3e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1525,17 +1525,15 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) } /* Public E-Switch API */ -#define ESW_ALLOWED(esw) ((esw) && MLX5_VPORT_MANAGER((esw)->dev)) +#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) + int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { int err; int i, enabled_events; - if (!ESW_ALLOWED(esw)) - return 0; - - if (!MLX5_ESWITCH_MANAGER(esw->dev) || + if (!ESW_ALLOWED(esw) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); return -EOPNOTSUPP; @@ -1728,7 +1726,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u64 node_guid; int err = 0; - if (!ESW_ALLOWED(esw)) + if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) return -EPERM; if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) return -EINVAL; @@ -1805,7 +1803,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, { struct mlx5_vport *evport; - if (!ESW_ALLOWED(esw)) + if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 71153c0f1605..aa9a88e84e3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -549,8 +549,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, return -EINVAL; if (!MLX5_CAP_GEN(mdev, vport_group_manager)) return -EACCES; - if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify)) - return -EOPNOTSUPP; in = kvzalloc(inlen, GFP_KERNEL); if (!in) -- GitLab From 778bce908b84b7d59d09ed0b9efca4de0edfc2c2 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 26 Jun 2018 09:16:31 +0800 Subject: [PATCH 0915/1291] nfp: cast sizeof() to int when comparing with error code [ Upstream commit 2d2595719a97c876f35b1e60e5768e58753b268c ] sizeof() will return unsigned value so in the error check negative error code will be always larger than sizeof(). Fixes: a0d8e02c35ff ("nfp: add support for reading nffw info") Signed-off-by: Chengguang Xu Acked-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c index cd34097b79f1..37a6d7822a38 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c @@ -232,7 +232,7 @@ struct nfp_nffw_info *nfp_nffw_info_open(struct nfp_cpp *cpp) err = nfp_cpp_read(cpp, nfp_resource_cpp_id(state->res), nfp_resource_address(state->res), fwinf, sizeof(*fwinf)); - if (err < sizeof(*fwinf)) + if (err < (int)sizeof(*fwinf)) goto err_release; if (!nffw_res_flg_init_get(fwinf)) -- GitLab From 90c45a36250b280f51af7fcce72ecfb3f9701f75 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Jun 2018 22:17:17 -0700 Subject: [PATCH 0916/1291] selftests/x86/sigreturn/64: Fix spurious failures on AMD CPUs [ Upstream commit ec348020566009d3da9b99f07c05814d13969c78 ] When I wrote the sigreturn test, I didn't realize that AMD's busted IRET behavior was different from Intel's busted IRET behavior: On AMD CPUs, the CPU leaks the high 32 bits of the kernel stack pointer to certain userspace contexts. Gee, thanks. There's very little the kernel can do about it. Modify the test so it passes. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/86e7fd3564497f657de30a36da4505799eebef01.1530076529.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/sigreturn.c | 46 ++++++++++++++++--------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c index 246145b84a12..2559e2c01793 100644 --- a/tools/testing/selftests/x86/sigreturn.c +++ b/tools/testing/selftests/x86/sigreturn.c @@ -612,19 +612,38 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) greg_t req = requested_regs[i], res = resulting_regs[i]; if (i == REG_TRAPNO || i == REG_IP) continue; /* don't care */ - if (i == REG_SP) { - printf("\tSP: %llx -> %llx\n", (unsigned long long)req, - (unsigned long long)res); + if (i == REG_SP) { /* - * In many circumstances, the high 32 bits of rsp - * are zeroed. For example, we could be a real - * 32-bit program, or we could hit any of a number - * of poorly-documented IRET or segmented ESP - * oddities. If this happens, it's okay. + * If we were using a 16-bit stack segment, then + * the kernel is a bit stuck: IRET only restores + * the low 16 bits of ESP/RSP if SS is 16-bit. + * The kernel uses a hack to restore bits 31:16, + * but that hack doesn't help with bits 63:32. + * On Intel CPUs, bits 63:32 end up zeroed, and, on + * AMD CPUs, they leak the high bits of the kernel + * espfix64 stack pointer. There's very little that + * the kernel can do about it. + * + * Similarly, if we are returning to a 32-bit context, + * the CPU will often lose the high 32 bits of RSP. */ - if (res == (req & 0xFFFFFFFF)) - continue; /* OK; not expected to work */ + + if (res == req) + continue; + + if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) { + printf("[NOTE]\tSP: %llx -> %llx\n", + (unsigned long long)req, + (unsigned long long)res); + continue; + } + + printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n", + (unsigned long long)requested_regs[i], + (unsigned long long)resulting_regs[i]); + nerrs++; + continue; } bool ignore_reg = false; @@ -663,13 +682,6 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) } if (requested_regs[i] != resulting_regs[i] && !ignore_reg) { - /* - * SP is particularly interesting here. The - * usual cause of failures is that we hit the - * nasty IRET case of returning to a 16-bit SS, - * in which case bits 16:31 of the *kernel* - * stack pointer persist in ESP. - */ printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", i, (unsigned long long)requested_regs[i], (unsigned long long)resulting_regs[i]); -- GitLab From 9f8da50b0232adef07a58402316acb6ef0205a3e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Jun 2018 22:17:18 -0700 Subject: [PATCH 0917/1291] selftests/x86/sigreturn: Do minor cleanups [ Upstream commit e8a445dea219c32727016af14f847d2e8f7ebec8 ] We have short names for the requested and resulting register values. Use them instead of spelling out the whole register entry for each case. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/bb3bc1f923a2f6fe7912d22a1068fe29d6033d38.1530076529.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/sigreturn.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c index 2559e2c01793..4d9dc3f2fd70 100644 --- a/tools/testing/selftests/x86/sigreturn.c +++ b/tools/testing/selftests/x86/sigreturn.c @@ -610,6 +610,7 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) */ for (int i = 0; i < NGREG; i++) { greg_t req = requested_regs[i], res = resulting_regs[i]; + if (i == REG_TRAPNO || i == REG_IP) continue; /* don't care */ @@ -673,18 +674,18 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) #endif /* Sanity check on the kernel */ - if (i == REG_CX && requested_regs[i] != resulting_regs[i]) { + if (i == REG_CX && req != res) { printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", - (unsigned long long)requested_regs[i], - (unsigned long long)resulting_regs[i]); + (unsigned long long)req, + (unsigned long long)res); nerrs++; continue; } - if (requested_regs[i] != resulting_regs[i] && !ignore_reg) { + if (req != res && !ignore_reg) { printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", - i, (unsigned long long)requested_regs[i], - (unsigned long long)resulting_regs[i]); + i, (unsigned long long)req, + (unsigned long long)res); nerrs++; } } -- GitLab From 58b22343150ba5c7e3d8cbac1c320773a7ef530f Mon Sep 17 00:00:00 2001 From: Keerthy Date: Tue, 5 Jun 2018 15:37:51 +0530 Subject: [PATCH 0918/1291] ARM: dts: da850: Fix interrups property for gpio [ Upstream commit 3eb1b955cd7ed1e621ace856710006c2a8a7f231 ] The intc #interrupt-cells is equal to 1. Currently gpio node has 2 cells per IRQ which is wrong. Remove the additional cell for each of the interrupts. Signed-off-by: Keerthy Fixes: 2e38b946dc54 ("ARM: davinci: da850: add GPIO DT node") Signed-off-by: Sekhar Nori Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/da850.dtsi | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/da850.dtsi b/arch/arm/boot/dts/da850.dtsi index 8a15f7193c82..77dd62e260db 100644 --- a/arch/arm/boot/dts/da850.dtsi +++ b/arch/arm/boot/dts/da850.dtsi @@ -518,11 +518,7 @@ gpio: gpio@226000 { gpio-controller; #gpio-cells = <2>; reg = <0x226000 0x1000>; - interrupts = <42 IRQ_TYPE_EDGE_BOTH - 43 IRQ_TYPE_EDGE_BOTH 44 IRQ_TYPE_EDGE_BOTH - 45 IRQ_TYPE_EDGE_BOTH 46 IRQ_TYPE_EDGE_BOTH - 47 IRQ_TYPE_EDGE_BOTH 48 IRQ_TYPE_EDGE_BOTH - 49 IRQ_TYPE_EDGE_BOTH 50 IRQ_TYPE_EDGE_BOTH>; + interrupts = <42 43 44 45 46 47 48 49 50>; ti,ngpio = <144>; ti,davinci-gpio-unbanked = <0>; status = "disabled"; -- GitLab From 4c68433396e1e94b0b493c6c08a33e7e4ba48547 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 23 Jun 2018 17:00:56 +0200 Subject: [PATCH 0919/1291] ARM64: dts: meson-gxl: fix Mali GPU compatible string [ Upstream commit 1c38f4afd5d40234b67635b3c608a4093be04b96 ] meson-gxl-mali.dtsi is only used on GXL SoCs. Thus it should use the GXL specific compatible string instead of the GXBB one. For now this is purely cosmetic since the (out-of-tree) lima driver for this GPU currently uses the "arm,mali-450" match instead of the SoC specific one. However, update the .dts to match the documentation since this driver behavior might change in the future. Signed-off-by: Martin Blumenstingl Acked-by: Neil Armstrong Signed-off-by: Kevin Hilman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/amlogic/meson-gxl-mali.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-mali.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-mali.dtsi index f06cc234693b..379abc3d82fe 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-mali.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-mali.dtsi @@ -7,7 +7,7 @@ &apb { mali: gpu@c0000 { - compatible = "amlogic,meson-gxbb-mali", "arm,mali-450"; + compatible = "amlogic,meson-gxl-mali", "arm,mali-450"; reg = <0x0 0xc0000 0x0 0x40000>; interrupts = , , -- GitLab From 4cea08638e9aad931771a4a5b50dabf86910e4de Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 19 Jun 2018 15:20:50 +0200 Subject: [PATCH 0920/1291] dmaengine: pl330: report BURST residue granularity [ Upstream commit e3f329c600033f011a978a8bc4ddb1e2e94c4f4d ] The reported residue is already calculated in BURST unit granularity, so advertise this capability properly to other devices in the system. Fixes: aee4d1fac887 ("dmaengine: pl330: improve pl330_tx_status() function") Signed-off-by: Marek Szyprowski Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/pl330.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 7432c8894e32..d19862f4dc9a 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2923,7 +2923,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) pd->src_addr_widths = PL330_DMA_BUSWIDTHS; pd->dst_addr_widths = PL330_DMA_BUSWIDTHS; pd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); - pd->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + pd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; pd->max_burst = ((pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP) ? 1 : PL330_MAX_BURST); -- GitLab From bb2b7243e3f7b78b3ab720b084166d43bf0f2eac Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 22 Jun 2018 14:15:47 +0300 Subject: [PATCH 0921/1291] dmaengine: k3dma: Off by one in k3_of_dma_simple_xlate() [ Upstream commit c4c2b7644cc9a41f17a8cc8904efe3f66ae4c7ed ] The d->chans[] array has d->dma_requests elements so the > should be >= here. Fixes: 8e6152bc660e ("dmaengine: Add hisilicon k3 DMA engine driver") Signed-off-by: Dan Carpenter Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/k3dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c index 01d2a750a621..219ae3b545db 100644 --- a/drivers/dma/k3dma.c +++ b/drivers/dma/k3dma.c @@ -787,7 +787,7 @@ static struct dma_chan *k3_of_dma_simple_xlate(struct of_phandle_args *dma_spec, struct k3_dma_dev *d = ofdma->of_dma_data; unsigned int request = dma_spec->args[0]; - if (request > d->dma_requests) + if (request >= d->dma_requests) return NULL; return dma_get_slave_channel(&(d->chans[request].vc.chan)); -- GitLab From 94cac10c50c1e38a0c51be0fded73e5aa6fa9080 Mon Sep 17 00:00:00 2001 From: Ryan Hsu Date: Mon, 18 Jun 2018 17:00:04 +0300 Subject: [PATCH 0922/1291] ath10k: update the phymode along with bandwidth change request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9191fc2a431bade3dedc9ad17759495a9f82f41b ] In the case of Station connects to AP with narrower bandwidth at beginning. And later the AP changes the bandwidth to winder bandwidth, the AP will beacon with wider bandwidth IE, eg VHT20->VHT40->VHT80 or VHT40->VHT80. Since the supported BANDWIDTH will be limited by the PHYMODE, so while Station receives the bandwidth change request, it will also need to reconfigure the PHYMODE setting to firmware instead of just configuring the BANDWIDTH info, otherwise it'll trigger a firmware crash with non-support bandwidth. The issue was observed in WLAN.RM.4.4.1-00051-QCARMSWP-1, QCA6174 with below scenario: AP xxx changed bandwidth, new config is 5200 MHz, width 2 (5190/0 MHz) disconnect from AP xxx for new auth to yyy RX ReassocResp from xxx (capab=0x1111 status=0 aid=102) associated .... AP xxx changed bandwidth, new config is 5200 MHz, width 2 (5190/0 MHz) AP xxx changed bandwidth, new config is 5200 MHz, width 3 (5210/0 MHz) .... firmware register dump: [00]: 0x05030000 0x000015B3 0x00987291 0x00955B31 [04]: 0x00987291 0x00060730 0x00000004 0x00000001 [08]: 0x004089F0 0x00955A00 0x000A0B00 0x00400000 [12]: 0x00000009 0x00000000 0x00952CD0 0x00952CE6 [16]: 0x00952CC4 0x0098E25F 0x00000000 0x0091080D [20]: 0x40987291 0x0040E7A8 0x00000000 0x0041EE3C [24]: 0x809ABF05 0x0040E808 0x00000000 0xC0987291 [28]: 0x809A650C 0x0040E948 0x0041FE40 0x004345C4 [32]: 0x809A5C63 0x0040E988 0x0040E9AC 0x0042D1A8 [36]: 0x8091D252 0x0040E9A8 0x00000002 0x00000001 [40]: 0x809FDA9D 0x0040EA58 0x0043D554 0x0042D554 [44]: 0x809F8B22 0x0040EA78 0x0043D554 0x00000001 [48]: 0x80911210 0x0040EAC8 0x00000010 0x004041D0 [52]: 0x80911154 0x0040EB28 0x00400000 0x00000000 [56]: 0x8091122D 0x0040EB48 0x00000000 0x00400600 Reported-by: Rouven Czerwinski Tested-by: Timur Kristóf Signed-off-by: Ryan Hsu Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/mac.c | 16 ++++++++++++++-- drivers/net/wireless/ath/ath10k/wmi.h | 1 + 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index df11bb449988..52ebed1f55a1 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -5923,8 +5923,19 @@ static void ath10k_sta_rc_update_wk(struct work_struct *wk) ath10k_mac_max_vht_nss(vht_mcs_mask))); if (changed & IEEE80211_RC_BW_CHANGED) { - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac update sta %pM peer bw %d\n", - sta->addr, bw); + enum wmi_phy_mode mode; + + mode = chan_to_phymode(&def); + ath10k_dbg(ar, ATH10K_DBG_MAC, "mac update sta %pM peer bw %d phymode %d\n", + sta->addr, bw, mode); + + err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr, + WMI_PEER_PHYMODE, mode); + if (err) { + ath10k_warn(ar, "failed to update STA %pM peer phymode %d: %d\n", + sta->addr, mode, err); + goto exit; + } err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr, WMI_PEER_CHAN_WIDTH, bw); @@ -5965,6 +5976,7 @@ static void ath10k_sta_rc_update_wk(struct work_struct *wk) sta->addr); } +exit: mutex_unlock(&ar->conf_mutex); } diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index bab876cf25fe..d0e05aa437e3 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -6002,6 +6002,7 @@ enum wmi_peer_param { WMI_PEER_NSS = 0x5, WMI_PEER_USE_4ADDR = 0x6, WMI_PEER_DEBUG = 0xa, + WMI_PEER_PHYMODE = 0xd, WMI_PEER_DUMMY_VAR = 0xff, /* dummy parameter for STA PS workaround */ }; -- GitLab From 601c226ea6623b8fb15e33fab3608755929de4da Mon Sep 17 00:00:00 2001 From: BingJing Chang Date: Thu, 28 Jun 2018 18:40:11 +0800 Subject: [PATCH 0923/1291] md/raid10: fix that replacement cannot complete recovery after reassemble [ Upstream commit bda3153998f3eb2cafa4a6311971143628eacdbc ] During assemble, the spare marked for replacement is not checked. conf->fullsync cannot be updated to be 1. As a result, recovery will treat it as a clean array. All recovering sectors are skipped. Original device is replaced with the not-recovered spare. mdadm -C /dev/md0 -l10 -n4 -pn2 /dev/loop[0123] mdadm /dev/md0 -a /dev/loop4 mdadm /dev/md0 --replace /dev/loop0 mdadm -S /dev/md0 # stop array during recovery mdadm -A /dev/md0 /dev/loop[01234] After reassemble, you can see recovery go on, but it completes immediately. In fact, recovery is not actually processed. To solve this problem, we just add the missing logics for replacment spares. (In raid1.c or raid5.c, they have already been checked.) Reported-by: Alex Chen Reviewed-by: Alex Wu Reviewed-by: Chung-Chiang Cheng Signed-off-by: BingJing Chang Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index b20c23f970f4..262a0f0f8fd5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3754,6 +3754,13 @@ static int raid10_run(struct mddev *mddev) disk->rdev->saved_raid_disk < 0) conf->fullsync = 1; } + + if (disk->replacement && + !test_bit(In_sync, &disk->replacement->flags) && + disk->replacement->saved_raid_disk < 0) { + conf->fullsync = 1; + } + disk->recovery_disabled = mddev->recovery_disabled - 1; } -- GitLab From 0aa88fda693aebe6eccf538ca6ae64cf58a0c60a Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Wed, 27 Jun 2018 11:43:58 -0400 Subject: [PATCH 0924/1291] dev-dax: check_vma: ratelimit dev_info-s [ Upstream commit 5a14e91d559aee5bdb0e002e1153fd9c4338a29e ] This is easily triggered from userspace, so let's ratelimit the messages. Signed-off-by: Jeff Moyer Signed-off-by: Dan Williams Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dax/device.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 7b0bf825c4e7..050e299129ac 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -188,14 +188,16 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, /* prevent private mappings from being established */ if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) { - dev_info(dev, "%s: %s: fail, attempted private mapping\n", + dev_info_ratelimited(dev, + "%s: %s: fail, attempted private mapping\n", current->comm, func); return -EINVAL; } mask = dax_region->align - 1; if (vma->vm_start & mask || vma->vm_end & mask) { - dev_info(dev, "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n", + dev_info_ratelimited(dev, + "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n", current->comm, func, vma->vm_start, vma->vm_end, mask); return -EINVAL; @@ -203,13 +205,15 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV && (vma->vm_flags & VM_DONTCOPY) == 0) { - dev_info(dev, "%s: %s: fail, dax range requires MADV_DONTFORK\n", + dev_info_ratelimited(dev, + "%s: %s: fail, dax range requires MADV_DONTFORK\n", current->comm, func); return -EINVAL; } if (!vma_is_dax(vma)) { - dev_info(dev, "%s: %s: fail, vma is not DAX capable\n", + dev_info_ratelimited(dev, + "%s: %s: fail, vma is not DAX capable\n", current->comm, func); return -EINVAL; } -- GitLab From 334c9cd8ba8d8c1b5dcb5a49b997aeef3db06c1d Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sun, 24 Jun 2018 21:10:49 -0400 Subject: [PATCH 0925/1291] nl80211: relax ht operation checks for mesh [ Upstream commit 188f60ab8e787fcbb5ac9d64ede23a0070231f09 ] Commit 9757235f451c, "nl80211: correct checks for NL80211_MESHCONF_HT_OPMODE value") relaxed the range for the HT operation field in meshconf, while also adding checks requiring the non-greenfield and non-ht-sta bits to be set in certain circumstances. The latter bit is actually reserved for mesh BSSes according to Table 9-168 in 802.11-2016, so in fact it should not be set. wpa_supplicant sets these bits because the mesh and AP code share the same implementation, but authsae does not. As a result, some meshconf updates from authsae which set only the NONHT_MIXED protection bits were being rejected. In order to avoid breaking userspace by changing the rules again, simply accept the values with or without the bits set, and mask off the reserved bit to match the spec. While in here, update the 802.11-2012 reference to 802.11-2016. Fixes: 9757235f451c ("nl80211: correct checks for NL80211_MESHCONF_HT_OPMODE value") Cc: Masashi Honma Signed-off-by: Bob Copeland Reviewed-by: Masashi Honma Reviewed-by: Masashi Honma Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ea28aa505302..0cecfdaa681d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5990,7 +5990,7 @@ do { \ nl80211_check_s32); /* * Check HT operation mode based on - * IEEE 802.11 2012 8.4.2.59 HT Operation element. + * IEEE 802.11-2016 9.4.2.57 HT Operation element. */ if (tb[NL80211_MESHCONF_HT_OPMODE]) { ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]); @@ -6000,22 +6000,9 @@ do { \ IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) return -EINVAL; - if ((ht_opmode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) && - (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) - return -EINVAL; + /* NON_HT_STA bit is reserved, but some programs set it */ + ht_opmode &= ~IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT; - switch (ht_opmode & IEEE80211_HT_OP_MODE_PROTECTION) { - case IEEE80211_HT_OP_MODE_PROTECTION_NONE: - case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ: - if (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT) - return -EINVAL; - break; - case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER: - case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED: - if (!(ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) - return -EINVAL; - break; - } cfg->ht_opmode = ht_opmode; mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1)); } -- GitLab From d4857eb755d72c9e063b17256e5479353a2bf785 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 29 Jun 2018 09:33:39 +0200 Subject: [PATCH 0926/1291] nl80211: check nla_parse_nested() return values [ Upstream commit 95bca62fb723a121954fc7ae5473bb2c1f0d5986 ] At the very least we should check the return value if nla_parse_nested() is called with a non-NULL policy. Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0cecfdaa681d..4cd351b74e48 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10529,9 +10529,12 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) rem) { u8 *mask_pat; - nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, - nl80211_packet_pattern_policy, - info->extack); + err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, + nl80211_packet_pattern_policy, + info->extack); + if (err) + goto error; + err = -EINVAL; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) @@ -10780,8 +10783,11 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, rem) { u8 *mask_pat; - nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, - nl80211_packet_pattern_policy, NULL); + err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, + nl80211_packet_pattern_policy, NULL); + if (err) + return err; + if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) return -EINVAL; -- GitLab From 49316955f501da5dd8eb89ee1422ff2a921ee2bd Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:06:13 +0200 Subject: [PATCH 0927/1291] drm/exynos: gsc: Fix support for NV16/61, YUV420/YVU420 and YUV422 modes [ Upstream commit dd209ef809080ced903e7747ee3ef640c923a1d2 ] Fix following issues related to planar YUV pixel format configuration: - NV16/61 modes were incorrectly programmed as NV12/21, - YVU420 was programmed as YUV420 on source, - YVU420 and YUV422 were programmed as YUV420 on output. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 29 +++++++++++++++++-------- drivers/gpu/drm/exynos/regs-gsc.h | 1 + 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 0506b2b17ac1..48f913d8208c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -532,21 +532,25 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt) GSC_IN_CHROMA_ORDER_CRCB); break; case DRM_FORMAT_NV21: + cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV420_2P); + break; case DRM_FORMAT_NV61: - cfg |= (GSC_IN_CHROMA_ORDER_CRCB | - GSC_IN_YUV420_2P); + cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV422_2P); break; case DRM_FORMAT_YUV422: cfg |= GSC_IN_YUV422_3P; break; case DRM_FORMAT_YUV420: + cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV420_3P); + break; case DRM_FORMAT_YVU420: - cfg |= GSC_IN_YUV420_3P; + cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV420_3P); break; case DRM_FORMAT_NV12: + cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV420_2P); + break; case DRM_FORMAT_NV16: - cfg |= (GSC_IN_CHROMA_ORDER_CBCR | - GSC_IN_YUV420_2P); + cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV422_2P); break; default: dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); @@ -806,18 +810,25 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt) GSC_OUT_CHROMA_ORDER_CRCB); break; case DRM_FORMAT_NV21: - case DRM_FORMAT_NV61: cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV420_2P); break; + case DRM_FORMAT_NV61: + cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV422_2P); + break; case DRM_FORMAT_YUV422: + cfg |= GSC_OUT_YUV422_3P; + break; case DRM_FORMAT_YUV420: + cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV420_3P); + break; case DRM_FORMAT_YVU420: - cfg |= GSC_OUT_YUV420_3P; + cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV420_3P); break; case DRM_FORMAT_NV12: + cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV420_2P); + break; case DRM_FORMAT_NV16: - cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | - GSC_OUT_YUV420_2P); + cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV422_2P); break; default: dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); diff --git a/drivers/gpu/drm/exynos/regs-gsc.h b/drivers/gpu/drm/exynos/regs-gsc.h index 4704a993cbb7..16b39734115c 100644 --- a/drivers/gpu/drm/exynos/regs-gsc.h +++ b/drivers/gpu/drm/exynos/regs-gsc.h @@ -138,6 +138,7 @@ #define GSC_OUT_YUV420_3P (3 << 4) #define GSC_OUT_YUV422_1P (4 << 4) #define GSC_OUT_YUV422_2P (5 << 4) +#define GSC_OUT_YUV422_3P (6 << 4) #define GSC_OUT_YUV444 (7 << 4) #define GSC_OUT_TILE_TYPE_MASK (1 << 2) #define GSC_OUT_TILE_C_16x8 (0 << 2) -- GitLab From 4091040d025b778c2b935e5dd67a58d7ba22d1a1 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:07:40 +0200 Subject: [PATCH 0928/1291] drm/exynos: decon5433: Fix per-plane global alpha for XRGB modes [ Upstream commit ab337fc274a1957ff0771f19e826c736253f7c39 ] Set per-plane global alpha to maximum value to get proper blending of XRGB and ARGB planes. This fixes the strange order of overlapping planes. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 6be5b53c3b27..8508f14bfebf 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -352,8 +352,8 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc, writel(val, ctx->addr + DECON_VIDOSDxB(win)); } - val = VIDOSD_Wx_ALPHA_R_F(0x0) | VIDOSD_Wx_ALPHA_G_F(0x0) | - VIDOSD_Wx_ALPHA_B_F(0x0); + val = VIDOSD_Wx_ALPHA_R_F(0xff) | VIDOSD_Wx_ALPHA_G_F(0xff) | + VIDOSD_Wx_ALPHA_B_F(0xff); writel(val, ctx->addr + DECON_VIDOSDxC(win)); val = VIDOSD_Wx_ALPHA_R_F(0x0) | VIDOSD_Wx_ALPHA_G_F(0x0) | -- GitLab From 8a9e8b683a7c8a3fb52fcaa692ffd704ba03f29d Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:07:49 +0200 Subject: [PATCH 0929/1291] drm/exynos: decon5433: Fix WINCONx reset value [ Upstream commit 7b7aa62c05eac9789c208b946f515983a9255d8d ] The only bits that should be preserved in decon_win_set_fmt() is WINCONx_ENWIN_F. All other bits depends on the selected pixel formats and are set by the mentioned function. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 8508f14bfebf..f905c214fdd0 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -261,7 +261,7 @@ static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win, unsigned long val; val = readl(ctx->addr + DECON_WINCONx(win)); - val &= ~WINCONx_BPPMODE_MASK; + val &= WINCONx_ENWIN_F; switch (fb->format->format) { case DRM_FORMAT_XRGB1555: -- GitLab From a567493430be60b2a29b37d6401992f68acce032 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 25 Jun 2018 15:51:30 -0700 Subject: [PATCH 0930/1291] drbd: Fix drbd_request_prepare() discard handling [ Upstream commit fad2d4ef636654e926d374ef038f4cd4286661f6 ] Fix the test that verifies whether bio_op(bio) represents a discard or write zeroes operation. Compile-tested only. Cc: Philipp Reisner Cc: Lars Ellenberg Fixes: 7435e9018f91 ("drbd: zero-out partial unaligned discards on local backend") Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/drbd/drbd_req.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index de8566e55334..c72071c300bb 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1244,8 +1244,8 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long _drbd_start_io_acct(device, req); /* process discards always from our submitter thread */ - if ((bio_op(bio) & REQ_OP_WRITE_ZEROES) || - (bio_op(bio) & REQ_OP_DISCARD)) + if (bio_op(bio) == REQ_OP_WRITE_ZEROES || + bio_op(bio) == REQ_OP_DISCARD) goto queue_for_submitter_thread; if (rw == WRITE && req->private_bio && req->i.size -- GitLab From a21b37053edc1098eaae9af37549e1bdb4640942 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 28 Jun 2018 23:34:58 +0200 Subject: [PATCH 0931/1291] bpf, s390: fix potential memleak when later bpf_jit_prog fails [ Upstream commit f605ce5eb26ac934fb8106d75d46a2c875a2bf23 ] If we would ever fail in the bpf_jit_prog() pass that writes the actual insns to the image after we got header via bpf_jit_binary_alloc() then we also need to make sure to free it through bpf_jit_binary_free() again when bailing out. Given we had prior bpf_jit_prog() passes to initially probe for clobbered registers, program size and to fill in addrs arrray for jump targets, this is more of a theoretical one, but at least make sure this doesn't break with future changes. Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Signed-off-by: Daniel Borkmann Cc: Martin Schwidefsky Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/net/bpf_jit_comp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 11cd151733d4..45f1ea117128 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1403,6 +1403,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) goto free_addrs; } if (bpf_jit_prog(&jit, fp)) { + bpf_jit_binary_free(header); fp = orig_fp; goto free_addrs; } -- GitLab From dfcee0c7d9cb16886f173de2199875b4c79f43cf Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Fri, 29 Jun 2018 13:49:54 -0500 Subject: [PATCH 0932/1291] PCI: xilinx: Add missing of_node_put() [ Upstream commit 8c3f9bd851a4d3acf0a0f222d4e9e41c0cd1ea8e ] The call to of_get_next_child() returns a node pointer with refcount incremented thus it must be explicitly decremented here after the last usage. Fixes: 8961def56845 ("PCI: xilinx: Add Xilinx AXI PCIe Host Bridge IP driver") Signed-off-by: Nicholas Mc Guire [lorenzo.pieralisi@arm.com: reworked commit log] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pcie-xilinx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/host/pcie-xilinx.c b/drivers/pci/host/pcie-xilinx.c index 94e13cb8608f..29f024f0ed7f 100644 --- a/drivers/pci/host/pcie-xilinx.c +++ b/drivers/pci/host/pcie-xilinx.c @@ -511,6 +511,7 @@ static int xilinx_pcie_init_irq_domain(struct xilinx_pcie_port *port) port->leg_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX, &intx_domain_ops, port); + of_node_put(pcie_intc_node); if (!port->leg_domain) { dev_err(dev, "Failed to get a INTx IRQ domain\n"); return -ENODEV; -- GitLab From 7851cea48abca82c188ba76f4fbefce3fc0de340 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Fri, 29 Jun 2018 13:50:10 -0500 Subject: [PATCH 0933/1291] PCI: xilinx-nwl: Add missing of_node_put() [ Upstream commit 342639d996f18bc0a4db2f42a84230c0a966dc94 ] The call to of_get_next_child() returns a node pointer with refcount incremented thus it must be explicitly decremented here after the last usage. Fixes: ab597d35ef11 ("PCI: xilinx-nwl: Add support for Xilinx NWL PCIe Host Controller") Signed-off-by: Nicholas Mc Guire [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pcie-xilinx-nwl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c index 65dea98b2643..dd527ea558d7 100644 --- a/drivers/pci/host/pcie-xilinx-nwl.c +++ b/drivers/pci/host/pcie-xilinx-nwl.c @@ -561,7 +561,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie *pcie) PCI_NUM_INTX, &legacy_domain_ops, pcie); - + of_node_put(legacy_intc_node); if (!pcie->legacy_irq_domain) { dev_err(dev, "failed to create IRQ domain\n"); return -ENOMEM; -- GitLab From 441712f28d691b53b5871ac9d0a631e8a8afe2f6 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Fri, 29 Jun 2018 13:50:27 -0500 Subject: [PATCH 0934/1291] PCI: faraday: Add missing of_node_put() [ Upstream commit 3dc6ddfedc2818eaaa36842fbb049191e0c5e50f ] The call to of_get_next_child() returns a node pointer with refcount incremented thus it must be explicitly decremented here in the error path and after the last usage. Fixes: d3c68e0a7e34 ("PCI: faraday: Add Faraday Technology FTPCI100 PCI Host Bridge driver") Signed-off-by: Nicholas Mc Guire [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-ftpci100.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/host/pci-ftpci100.c b/drivers/pci/host/pci-ftpci100.c index 4ea7d2ebcc5c..4e6b21931514 100644 --- a/drivers/pci/host/pci-ftpci100.c +++ b/drivers/pci/host/pci-ftpci100.c @@ -353,11 +353,13 @@ static int faraday_pci_setup_cascaded_irq(struct faraday_pci *p) irq = of_irq_get(intc, 0); if (irq <= 0) { dev_err(p->dev, "failed to get parent IRQ\n"); + of_node_put(intc); return irq ?: -EINVAL; } p->irqdomain = irq_domain_add_linear(intc, PCI_NUM_INTX, &faraday_pci_irqdomain_ops, p); + of_node_put(intc); if (!p->irqdomain) { dev_err(p->dev, "failed to create Gemini PCI IRQ domain\n"); return -EINVAL; -- GitLab From 3baa3f9221e221fb9488929e5289b49bb8ee7d18 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Thu, 28 Jun 2018 04:52:15 -0700 Subject: [PATCH 0935/1291] bnx2x: Fix receiving tx-timeout in error or recovery state. [ Upstream commit 484c016d9392786ce5c74017c206c706f29f823d ] Driver performs the internal reload when it receives tx-timeout event from the OS. Internal reload might fail in some scenarios e.g., fatal HW issues. In such cases OS still see the link, which would result in undesirable functionalities such as re-generation of tx-timeouts. The patch addresses this issue by indicating the link-down to OS when tx-timeout is detected, and keeping the link in down state till the internal reload is successful. Please consider applying it to 'net' branch. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 1 + drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 6 ++++++ drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 6 ++++++ 3 files changed, 13 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 352beff796ae..828e2e56b75e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1529,6 +1529,7 @@ struct bnx2x { struct link_vars link_vars; u32 link_cnt; struct bnx2x_link_report_data last_reported_link; + bool force_link_down; struct mdio_if_info mdio; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 6465414dad74..8498a357d389 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -1260,6 +1260,11 @@ void __bnx2x_link_report(struct bnx2x *bp) { struct bnx2x_link_report_data cur_data; + if (bp->force_link_down) { + bp->link_vars.link_up = 0; + return; + } + /* reread mf_cfg */ if (IS_PF(bp) && !CHIP_IS_E1(bp)) bnx2x_read_mf_cfg(bp); @@ -2817,6 +2822,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) bp->pending_max = 0; } + bp->force_link_down = false; if (bp->port.pmf) { rc = bnx2x_initial_phy_init(bp, load_mode); if (rc) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index e855a271db48..bd3e3f080ebf 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10279,6 +10279,12 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work) bp->sp_rtnl_state = 0; smp_mb(); + /* Immediately indicate link as down */ + bp->link_vars.link_up = 0; + bp->force_link_down = true; + netif_carrier_off(bp->dev); + BNX2X_ERR("Indicating link is down due to Tx-timeout\n"); + bnx2x_nic_unload(bp, UNLOAD_NORMAL, true); bnx2x_nic_load(bp, LOAD_NORMAL); -- GitLab From fba7c43d9368d90022d74a4716f33de3a6934e9b Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Thu, 28 Jun 2018 15:26:50 +0300 Subject: [PATCH 0936/1291] fsl/fman: fix parser reporting bad checksum on short frames [ Upstream commit b95f6fbc8e15803a596ca5e5e21008fba29694c6 ] The FMan hardware parser needs to be configured to remove the short frame padding from the checksum calculation, otherwise short UDP and TCP frames are likely to be marked as having a bad checksum. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/fman/fman_port.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index 1789b206be58..495190764155 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -324,6 +324,10 @@ struct fman_port_qmi_regs { #define HWP_HXS_PHE_REPORT 0x00000800 #define HWP_HXS_PCAC_PSTAT 0x00000100 #define HWP_HXS_PCAC_PSTOP 0x00000001 +#define HWP_HXS_TCP_OFFSET 0xA +#define HWP_HXS_UDP_OFFSET 0xB +#define HWP_HXS_SH_PAD_REM 0x80000000 + struct fman_port_hwp_regs { struct { u32 ssa; /* Soft Sequence Attachment */ @@ -728,6 +732,10 @@ static void init_hwp(struct fman_port *port) iowrite32be(0xffffffff, ®s->pmda[i].lcv); } + /* Short packet padding removal from checksum calculation */ + iowrite32be(HWP_HXS_SH_PAD_REM, ®s->pmda[HWP_HXS_TCP_OFFSET].ssa); + iowrite32be(HWP_HXS_SH_PAD_REM, ®s->pmda[HWP_HXS_UDP_OFFSET].ssa); + start_port_hwp(port); } -- GitLab From f06fae1641947549fefbef01cdec1f6c314f53a2 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Thu, 28 Jun 2018 15:26:51 +0300 Subject: [PATCH 0937/1291] dpaa_eth: DPAA SGT needs to be 256B [ Upstream commit 595e802e53f24642a145cf7f3e4ac9afab4c21ec ] The DPAA HW requires that at least 256 bytes from the start of the first scatter-gather table entry are allocated and accessible. The hardware reads the maximum size the table can have in one access, thus requiring that the allocation and mapping to be done for the maximum size of 256B even if there is a smaller number of entries in the table. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 519a021c0a25..a202c50d6fc7 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -125,6 +125,9 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms"); /* Default alignment for start of data in an Rx FD */ #define DPAA_FD_DATA_ALIGNMENT 16 +/* The DPAA requires 256 bytes reserved and mapped for the SGT */ +#define DPAA_SGT_SIZE 256 + /* Values for the L3R field of the FM Parse Results */ /* L3 Type field: First IP Present IPv4 */ @@ -1622,8 +1625,8 @@ static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv, if (unlikely(qm_fd_get_format(fd) == qm_fd_sg)) { nr_frags = skb_shinfo(skb)->nr_frags; - dma_unmap_single(dev, addr, qm_fd_get_offset(fd) + - sizeof(struct qm_sg_entry) * (1 + nr_frags), + dma_unmap_single(dev, addr, + qm_fd_get_offset(fd) + DPAA_SGT_SIZE, dma_dir); /* The sgt buffer has been allocated with netdev_alloc_frag(), @@ -1907,8 +1910,7 @@ static int skb_to_sg_fd(struct dpaa_priv *priv, void *sgt_buf; /* get a page frag to store the SGTable */ - sz = SKB_DATA_ALIGN(priv->tx_headroom + - sizeof(struct qm_sg_entry) * (1 + nr_frags)); + sz = SKB_DATA_ALIGN(priv->tx_headroom + DPAA_SGT_SIZE); sgt_buf = netdev_alloc_frag(sz); if (unlikely(!sgt_buf)) { netdev_err(net_dev, "netdev_alloc_frag() failed for size %d\n", @@ -1976,9 +1978,8 @@ static int skb_to_sg_fd(struct dpaa_priv *priv, skbh = (struct sk_buff **)buffer_start; *skbh = skb; - addr = dma_map_single(dev, buffer_start, priv->tx_headroom + - sizeof(struct qm_sg_entry) * (1 + nr_frags), - dma_dir); + addr = dma_map_single(dev, buffer_start, + priv->tx_headroom + DPAA_SGT_SIZE, dma_dir); if (unlikely(dma_mapping_error(dev, addr))) { dev_err(dev, "DMA mapping failed"); err = -EINVAL; -- GitLab From c845344aa2e2d5952f5a899fe8612b647cd39f3d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 28 Jun 2018 09:56:55 -0700 Subject: [PATCH 0938/1291] acpi/nfit: fix cmd_rc for acpi_nfit_ctl to always return a value [ Upstream commit c1985cefd844e26bd19673a6df8d8f0b1918c2db ] cmd_rc is passed in by reference to the acpi_nfit_ctl() function and the caller expects a value returned. However, when the package is pass through via the ND_CMD_CALL command, cmd_rc is not touched. Make sure cmd_rc is always set. Fixes: aef253382266 ("libnvdimm, nfit: centralize command status translation") Signed-off-by: Dave Jiang Signed-off-by: Dan Williams Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index d56822f58ab1..56f242f34fda 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -224,6 +224,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, const guid_t *guid; int rc, i; + *cmd_rc = -EINVAL; func = cmd; if (cmd == ND_CMD_CALL) { call_pkg = buf; @@ -314,6 +315,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, * If we return an error (like elsewhere) then caller wouldn't * be able to rely upon data returned to make calculation. */ + *cmd_rc = 0; return 0; } -- GitLab From 83141913ca6bbf95bdc4aa4f9d852d5c27464eac Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Sun, 1 Jul 2018 14:17:36 +0900 Subject: [PATCH 0939/1291] openrisc: entry: Fix delay slot exception detection [ Upstream commit ae15a41a641449f536578b0d9ec0e4ade130deb5 ] Originally in patch e6d20c55a4 ("openrisc: entry: Fix delay slot detection") I fixed delay slot detection, but only for QEMU. We missed that hardware delay slot detection using delay slot exception flag (DSX) was still broken. This was because QEMU set the DSX flag in both pre-exception supervision register (ESR) and supervision register (SR) register, but on real hardware the DSX flag is only set on the SR register during exceptions. Fix this by carrying the DSX flag into the SR register during exception. We also update the DSX flag read locations to read the value from the SR register not the pt_regs SR register which represents ESR. The ESR should never have the DSX flag set. In the process I updated/removed a few comments to match the current state. Including removing a comment saying that the DSX detection logic was inefficient and needed to be rewritten. I have tested this on QEMU with a patch ensuring it matches the hardware specification. Link: https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg00000.html Fixes: e6d20c55a4 ("openrisc: entry: Fix delay slot detection") Signed-off-by: Stafford Horne Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/openrisc/kernel/entry.S | 8 +------- arch/openrisc/kernel/head.S | 9 ++++++--- arch/openrisc/kernel/traps.c | 2 +- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index 1b7160c79646..b16e95a4e875 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -221,12 +221,6 @@ EXCEPTION_ENTRY(_data_page_fault_handler) l.addi r3,r1,0 // pt_regs /* r4 set be EXCEPTION_HANDLE */ // effective address of fault - /* - * __PHX__: TODO - * - * all this can be written much simpler. look at - * DTLB miss handler in the CONFIG_GUARD_PROTECTED_CORE part - */ #ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX l.lwz r6,PT_PC(r3) // address of an offending insn l.lwz r6,0(r6) // instruction that caused pf @@ -258,7 +252,7 @@ EXCEPTION_ENTRY(_data_page_fault_handler) #else - l.lwz r6,PT_SR(r3) // SR + l.mfspr r6,r0,SPR_SR // SR l.andi r6,r6,SPR_SR_DSX // check for delay slot exception l.sfne r6,r0 // exception happened in delay slot l.bnf 7f diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index 1e87913576e3..90979acdf165 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -141,8 +141,7 @@ * r4 - EEAR exception EA * r10 - current pointing to current_thread_info struct * r12 - syscall 0, since we didn't come from syscall - * r13 - temp it actually contains new SR, not needed anymore - * r31 - handler address of the handler we'll jump to + * r30 - handler address of the handler we'll jump to * * handler has to save remaining registers to the exception * ksp frame *before* tainting them! @@ -178,6 +177,7 @@ /* r1 is KSP, r30 is __pa(KSP) */ ;\ tophys (r30,r1) ;\ l.sw PT_GPR12(r30),r12 ;\ + /* r4 use for tmp before EA */ ;\ l.mfspr r12,r0,SPR_EPCR_BASE ;\ l.sw PT_PC(r30),r12 ;\ l.mfspr r12,r0,SPR_ESR_BASE ;\ @@ -197,7 +197,10 @@ /* r12 == 1 if we come from syscall */ ;\ CLEAR_GPR(r12) ;\ /* ----- turn on MMU ----- */ ;\ - l.ori r30,r0,(EXCEPTION_SR) ;\ + /* Carry DSX into exception SR */ ;\ + l.mfspr r30,r0,SPR_SR ;\ + l.andi r30,r30,SPR_SR_DSX ;\ + l.ori r30,r30,(EXCEPTION_SR) ;\ l.mtspr r0,r30,SPR_ESR_BASE ;\ /* r30: EA address of handler */ ;\ LOAD_SYMBOL_2_GPR(r30,handler) ;\ diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 8d8437169b5e..0d44e8007ad6 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -358,7 +358,7 @@ static inline int in_delay_slot(struct pt_regs *regs) return 0; } #else - return regs->sr & SPR_SR_DSX; + return mfspr(SPR_SR) & SPR_SR_DSX; #endif } -- GitLab From e793dc3d97669a7b631f06dec6019fe5adcf0ec5 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 18 Jun 2018 15:34:14 +1000 Subject: [PATCH 0940/1291] m68k: fix "bad page state" oops on ColdFire boot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ecd60532e060e45c63c57ecf1c8549b1d656d34d ] Booting a ColdFire m68k core with MMU enabled causes a "bad page state" oops since commit 1d40a5ea01d5 ("mm: mark pages in use for page tables"): BUG: Bad page state in process sh pfn:01ce2 page:004fefc8 count:0 mapcount:-1024 mapping:00000000 index:0x0 flags: 0x0() raw: 00000000 00000000 00000000 fffffbff 00000000 00000100 00000200 00000000 raw: 039c4000 page dumped because: nonzero mapcount Modules linked in: CPU: 0 PID: 22 Comm: sh Not tainted 4.17.0-07461-g1d40a5ea01d5 #13 Fix by calling pgtable_page_dtor() in our __pte_free_tlb() code path, so that the PG_table flag is cleared before we free the pte page. Note that I had to change the type of pte_free() to be static from extern. Otherwise you get a lot of warnings like this: ./arch/m68k/include/asm/mcf_pgalloc.h:80:2: warning: ‘pgtable_page_dtor’ is static but used in inline function ‘pte_free’ which is not static pgtable_page_dtor(page); ^ And making it static is consistent with our use of this in the other m68k pgalloc definitions of pte_free(). Signed-off-by: Greg Ungerer CC: Matthew Wilcox Reviewed-by: Geert Uytterhoeven Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/m68k/include/asm/mcf_pgalloc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h index 8b707c249026..12fe700632f4 100644 --- a/arch/m68k/include/asm/mcf_pgalloc.h +++ b/arch/m68k/include/asm/mcf_pgalloc.h @@ -44,6 +44,7 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page, unsigned long address) { + pgtable_page_dtor(page); __free_page(page); } @@ -74,8 +75,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, return page; } -extern inline void pte_free(struct mm_struct *mm, struct page *page) +static inline void pte_free(struct mm_struct *mm, struct page *page) { + pgtable_page_dtor(page); __free_page(page); } -- GitLab From 0bcba95686bef2055bc7d4521fe23ef7e24ee059 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 27 Jun 2018 17:03:45 -0500 Subject: [PATCH 0941/1291] objtool: Support GCC 8 '-fnoreorder-functions' [ Upstream commit 08b393d01c88aff27347ed2b1b354eb4db2f1532 ] Since the following commit: cd77849a69cf ("objtool: Fix GCC 8 cold subfunction detection for aliased functions") ... if the kernel is built with EXTRA_CFLAGS='-fno-reorder-functions', objtool can get stuck in an infinite loop. That flag causes the new GCC 8 cold subfunctions to be placed in .text instead of .text.unlikely. But it also has an unfortunate quirk: in the symbol table, the subfunction (e.g., nmi_panic.cold.7) is nested inside the parent (nmi_panic). That function overlap confuses objtool, and causes it to get into an infinite loop in next_insn_same_func(). Here's Allan's description of the loop: "Objtool iterates through the instructions in nmi_panic using next_insn_same_func. Once it reaches the end of nmi_panic at 0x534 it jumps to 0x528 as that's the start of nmi_panic.cold.7. However, since the instructions starting at 0x528 are still associated with nmi_panic objtool will get stuck in a loop, continually jumping back to 0x528 after reaching 0x534." Fix it by shortening the length of the parent function so that the functions no longer overlap. Reported-and-analyzed-by: Allan Xavier Signed-off-by: Josh Poimboeuf Cc: Allan Xavier Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/9e704c52bee651129b036be14feda317ae5606ae.1530136978.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/objtool/elf.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 4e60e105583e..0d1acb704f64 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -302,19 +302,34 @@ static int read_symbols(struct elf *elf) continue; sym->pfunc = sym->cfunc = sym; coldstr = strstr(sym->name, ".cold."); - if (coldstr) { - coldstr[0] = '\0'; - pfunc = find_symbol_by_name(elf, sym->name); - coldstr[0] = '.'; - - if (!pfunc) { - WARN("%s(): can't find parent function", - sym->name); - goto err; - } - - sym->pfunc = pfunc; - pfunc->cfunc = sym; + if (!coldstr) + continue; + + coldstr[0] = '\0'; + pfunc = find_symbol_by_name(elf, sym->name); + coldstr[0] = '.'; + + if (!pfunc) { + WARN("%s(): can't find parent function", + sym->name); + goto err; + } + + sym->pfunc = pfunc; + pfunc->cfunc = sym; + + /* + * Unfortunately, -fnoreorder-functions puts the child + * inside the parent. Remove the overlap so we can + * have sane assumptions. + * + * Note that pfunc->len now no longer matches + * pfunc->sym.st_size. + */ + if (sym->sec == pfunc->sec && + sym->offset >= pfunc->offset && + sym->offset + sym->len == pfunc->offset + pfunc->len) { + pfunc->len -= sym->len; } } } -- GitLab From f3ef33ee85c17b16f5834d2dadc1df0e79ce0841 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 1 Jul 2018 16:21:21 +0800 Subject: [PATCH 0942/1291] ipvlan: call dev_change_flags when ipvlan mode is reset [ Upstream commit 5dc2d3996a8b221c20dd0900bdad45031a572530 ] After we change the ipvlan mode from l3 to l2, or vice versa, we only reset IFF_NOARP flag, but don't flush the ARP table cache, which will cause eth->h_dest to be equal to eth->h_source in ipvlan_xmit_mode_l2(). Then the message will not come out of host. Here is the reproducer on local host: ip link set eth1 up ip addr add 192.168.1.1/24 dev eth1 ip link add link eth1 ipvlan1 type ipvlan mode l3 ip netns add net1 ip link set ipvlan1 netns net1 ip netns exec net1 ip link set ipvlan1 up ip netns exec net1 ip addr add 192.168.2.1/24 dev ipvlan1 ip route add 192.168.2.0/24 via 192.168.1.2 ping 192.168.2.2 -c 2 ip netns exec net1 ip link set ipvlan1 type ipvlan mode l2 ping 192.168.2.2 -c 2 Add the same configuration on remote host. After we set the mode to l2, we could find that the src/dst MAC addresses are the same on eth1: 21:26:06.648565 00:b7:13:ad:d3:05 > 00:b7:13:ad:d3:05, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 64, id 58356, offset 0, flags [DF], proto ICMP (1), length 84) 192.168.2.1 > 192.168.2.2: ICMP echo request, id 22686, seq 1, length 64 Fix this by calling dev_change_flags(), which will call netdevice notifier with flag change info. v2: a) As pointed out by Wang Cong, check return value for dev_change_flags() when change dev flags. b) As suggested by Stefano and Sabrina, move flags setting before l3mdev_ops. So we don't need to redo ipvlan_{, un}register_nf_hook() again in err path. Reported-by: Jianlin Shi Reviewed-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Fixes: 2ad7bf3638411 ("ipvlan: Initial check-in of the IPVLAN driver.") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_main.c | 36 +++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index e7f7a1a002ee..58133c9f701b 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -73,10 +73,23 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) { struct ipvl_dev *ipvlan; struct net_device *mdev = port->dev; - int err = 0; + unsigned int flags; + int err; ASSERT_RTNL(); if (port->mode != nval) { + list_for_each_entry(ipvlan, &port->ipvlans, pnode) { + flags = ipvlan->dev->flags; + if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) { + err = dev_change_flags(ipvlan->dev, + flags | IFF_NOARP); + } else { + err = dev_change_flags(ipvlan->dev, + flags & ~IFF_NOARP); + } + if (unlikely(err)) + goto fail; + } if (nval == IPVLAN_MODE_L3S) { /* New mode is L3S */ err = ipvlan_register_nf_hook(read_pnet(&port->pnet)); @@ -84,21 +97,28 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) mdev->l3mdev_ops = &ipvl_l3mdev_ops; mdev->priv_flags |= IFF_L3MDEV_MASTER; } else - return err; + goto fail; } else if (port->mode == IPVLAN_MODE_L3S) { /* Old mode was L3S */ mdev->priv_flags &= ~IFF_L3MDEV_MASTER; ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); mdev->l3mdev_ops = NULL; } - list_for_each_entry(ipvlan, &port->ipvlans, pnode) { - if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) - ipvlan->dev->flags |= IFF_NOARP; - else - ipvlan->dev->flags &= ~IFF_NOARP; - } port->mode = nval; } + return 0; + +fail: + /* Undo the flags changes that have been done so far. */ + list_for_each_entry_continue_reverse(ipvlan, &port->ipvlans, pnode) { + flags = ipvlan->dev->flags; + if (port->mode == IPVLAN_MODE_L3 || + port->mode == IPVLAN_MODE_L3S) + dev_change_flags(ipvlan->dev, flags | IFF_NOARP); + else + dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP); + } + return err; } -- GitLab From bf6c97326381890d4c778e6223714a29b17f406b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 28 Jun 2018 12:38:53 -0500 Subject: [PATCH 0943/1291] drm/amdgpu: fix swapped emit_ib_size in vce3 [ Upstream commit 0859df22ab7cfb3ad2df2caed76cadce6ac33a80 ] The phys and vm versions had the values swapped. Reviewed-by: Junwei Zhang Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 5183b46563f6..242dfb1433d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -899,7 +899,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = { .emit_frame_size = 4 + /* vce_v3_0_emit_pipeline_sync */ 6, /* amdgpu_vce_ring_emit_fence x1 no user fence */ - .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */ + .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */ .emit_ib = amdgpu_vce_ring_emit_ib, .emit_fence = amdgpu_vce_ring_emit_fence, .test_ring = amdgpu_vce_ring_test_ring, @@ -923,7 +923,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = { 6 + /* vce_v3_0_emit_vm_flush */ 4 + /* vce_v3_0_emit_pipeline_sync */ 6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */ - .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */ + .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */ .emit_ib = vce_v3_0_ring_emit_ib, .emit_vm_flush = vce_v3_0_emit_vm_flush, .emit_pipeline_sync = vce_v3_0_emit_pipeline_sync, -- GitLab From 979c7c0dd75da54f6e336536c8e3cc4717ec67d7 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Mon, 2 Jul 2018 23:49:54 -0700 Subject: [PATCH 0944/1291] x86/mm/32: Initialize the CR4 shadow before __flush_tlb_all() [ Upstream commit 4fb5f58e8d191f7c81637ad81284e4848afb4244 ] On 32-bit kernels, __flush_tlb_all() may have read the CR4 shadow before the initialization of CR4 shadow in cpu_init(). Fix it by adding an explicit cr4_init_shadow() call into start_secondary() which is the first function called on non-boot SMP CPUs - ahead of the __flush_tlb_all() call. ( This is somewhat of a layering violation, but start_secondary() does CR4 bootstrap in the PCID case anyway. ) Signed-off-by: Zhenzhong Duan Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/b07b6ae9-4b57-4b40-b9bc-50c2c67f1d91@default Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/smpboot.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5ebb0dbcf4f7..30447d210f37 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -223,6 +223,11 @@ static void notrace start_secondary(void *unused) #ifdef CONFIG_X86_32 /* switch away from the initial page table */ load_cr3(swapper_pg_dir); + /* + * Initialize the CR4 shadow before doing anything that could + * try to read it. + */ + cr4_init_shadow(); __flush_tlb_all(); #endif load_current_idt(); -- GitLab From 2aebd462a5fd0ae1bf0f08f3b01e48066c5489d8 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Tue, 26 Jun 2018 09:58:02 -0700 Subject: [PATCH 0945/1291] HID: wacom: Correct touch maximum XY of 2nd-gen Intuos [ Upstream commit 3b8d573586d1b9dee33edf6cb6f2ca05f4bca568 ] The touch sensors on the 2nd-gen Intuos tablets don't use a 4096x4096 sensor like other similar tablets (3rd-gen Bamboo, Intuos5, etc.). The incorrect maximum XY values don't normally affect userspace since touch input from these devices is typically relative rather than absolute. It does, however, cause problems when absolute distances need to be measured, e.g. for gesture recognition. Since the resolution of the touch sensor on these devices is 10 units / mm (versus 100 for the pen sensor), the proper maximum values can be calculated by simply dividing by 10. Fixes: b5fd2a3e92 ("Input: wacom - add support for three new Intuos devices") Signed-off-by: Jason Gerecke Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index c401b5b63f4c..4c72e68637c2 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -3212,8 +3212,14 @@ void wacom_setup_device_quirks(struct wacom *wacom) if (features->type >= INTUOSHT && features->type <= BAMBOO_PT) features->device_type |= WACOM_DEVICETYPE_PAD; - features->x_max = 4096; - features->y_max = 4096; + if (features->type == INTUOSHT2) { + features->x_max = features->x_max / 10; + features->y_max = features->y_max / 10; + } + else { + features->x_max = 4096; + features->y_max = 4096; + } } else if (features->pktlen == WACOM_PKGLEN_BBTOUCH) { features->device_type |= WACOM_DEVICETYPE_PAD; -- GitLab From 2059e527a659cf16d6bb709f1c8509f7a7623fc4 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 25 Jun 2018 09:34:03 -0300 Subject: [PATCH 0946/1291] ARM: imx_v6_v7_defconfig: Select ULPI support [ Upstream commit 157bcc06094c3c5800d3f4676527047b79b618e7 ] Select CONFIG_USB_CHIPIDEA_ULPI and CONFIG_USB_ULPI_BUS so that USB ULPI can be functional on some boards like imx51-babbge. This fixes a kernel hang in 4.18-rc1 on i.mx51-babbage, caused by commit 03e6275ae381 ("usb: chipidea: Fix ULPI on imx51"). Suggested-by: Andrey Smirnov Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/configs/imx_v6_v7_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 32acac9ab81a..21ac9f02407e 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -289,6 +289,7 @@ CONFIG_USB_STORAGE=y CONFIG_USB_CHIPIDEA=y CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CHIPIDEA_HOST=y +CONFIG_USB_CHIPIDEA_ULPI=y CONFIG_USB_SERIAL=m CONFIG_USB_SERIAL_GENERIC=y CONFIG_USB_SERIAL_FTDI_SIO=m @@ -325,6 +326,7 @@ CONFIG_USB_GADGETFS=m CONFIG_USB_FUNCTIONFS=m CONFIG_USB_MASS_STORAGE=m CONFIG_USB_G_SERIAL=m +CONFIG_USB_ULPI_BUS=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y -- GitLab From fa97c916277941e139d08191ccdb2152534ee648 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 26 Jun 2018 08:37:09 -0300 Subject: [PATCH 0947/1291] ARM: imx_v4_v5_defconfig: Select ULPI support [ Upstream commit 2ceb2780b790b74bc408a949f6aedbad8afa693e ] Select CONFIG_USB_CHIPIDEA_ULPI and CONFIG_USB_ULPI_BUS so that USB ULPI can be functional on some boards like that use ULPI interface. Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/configs/imx_v4_v5_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig index ca0f13cafe38..6e5a3d9c23e1 100644 --- a/arch/arm/configs/imx_v4_v5_defconfig +++ b/arch/arm/configs/imx_v4_v5_defconfig @@ -144,9 +144,11 @@ CONFIG_USB_STORAGE=y CONFIG_USB_CHIPIDEA=y CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CHIPIDEA_HOST=y +CONFIG_USB_CHIPIDEA_ULPI=y CONFIG_NOP_USB_XCEIV=y CONFIG_USB_GADGET=y CONFIG_USB_ETH=m +CONFIG_USB_ULPI_BUS=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y -- GitLab From 56f059c87bc6ee42f07b5dfd633daf60e98f2679 Mon Sep 17 00:00:00 2001 From: Mauricio Vasquez B Date: Fri, 29 Jun 2018 14:48:20 +0200 Subject: [PATCH 0948/1291] bpf: hash map: decrement counter on error [ Upstream commit ed2b82c03dc187018307c7c6bf9299705f3db383 ] Decrement the number of elements in the map in case the allocation of a new node fails. Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: Mauricio Vasquez B Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/hashtab.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 6533f08d1238..3d0ecc273cc6 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -730,13 +730,15 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, * old element will be freed immediately. * Otherwise return an error */ - atomic_dec(&htab->count); - return ERR_PTR(-E2BIG); + l_new = ERR_PTR(-E2BIG); + goto dec_count; } l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN, htab->map.numa_node); - if (!l_new) - return ERR_PTR(-ENOMEM); + if (!l_new) { + l_new = ERR_PTR(-ENOMEM); + goto dec_count; + } } memcpy(l_new->key, key, key_size); @@ -749,7 +751,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, GFP_ATOMIC | __GFP_NOWARN); if (!pptr) { kfree(l_new); - return ERR_PTR(-ENOMEM); + l_new = ERR_PTR(-ENOMEM); + goto dec_count; } } @@ -763,6 +766,9 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, l_new->hash = hash; return l_new; +dec_count: + atomic_dec(&htab->count); + return l_new; } static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, -- GitLab From 017fe62bb794a2d6f339b4d76ae58ac2d0349228 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 8 Mar 2018 21:58:43 +0100 Subject: [PATCH 0949/1291] tracing: Use __printf markup to silence compiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 26b68dd2f48fe7699a89f0cfbb9f4a650dc1c837 ] Silence warnings (triggered at W=1) by adding relevant __printf attributes. CC kernel/trace/trace.o kernel/trace/trace.c: In function ‘__trace_array_vprintk’: kernel/trace/trace.c:2979:2: warning: function might be possible candidate for ‘gnu_printf’ format attribute [-Wsuggest-attribute=format] len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); ^~~ AR kernel/trace/built-in.o Link: http://lkml.kernel.org/r/20180308205843.27447-1-malat@debian.org Signed-off-by: Mathieu Malaterre Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 20919489883f..fbc75c84076e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2958,6 +2958,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) } EXPORT_SYMBOL_GPL(trace_vbprintk); +__printf(3, 0) static int __trace_array_vprintk(struct ring_buffer *buffer, unsigned long ip, const char *fmt, va_list args) @@ -3012,12 +3013,14 @@ __trace_array_vprintk(struct ring_buffer *buffer, return len; } +__printf(3, 0) int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args); } +__printf(3, 0) int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...) { @@ -3033,6 +3036,7 @@ int trace_array_printk(struct trace_array *tr, return ret; } +__printf(3, 4) int trace_array_printk_buf(struct ring_buffer *buffer, unsigned long ip, const char *fmt, ...) { @@ -3048,6 +3052,7 @@ int trace_array_printk_buf(struct ring_buffer *buffer, return ret; } +__printf(2, 0) int trace_vprintk(unsigned long ip, const char *fmt, va_list args) { return trace_array_vprintk(&global_trace, ip, fmt, args); -- GitLab From 6e7084e2c3f9fe203eeab3b0f053b99f830afd71 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 3 Jul 2018 17:02:46 -0700 Subject: [PATCH 0950/1291] kasan: fix shadow_size calculation error in kasan_module_alloc [ Upstream commit 1e8e18f694a52d703665012ca486826f64bac29d ] There is a special case that the size is "(N << KASAN_SHADOW_SCALE_SHIFT) Pages plus X", the value of X is [1, KASAN_SHADOW_SCALE_SIZE-1]. The operation "size >> KASAN_SHADOW_SCALE_SHIFT" will drop X, and the roundup operation can not retrieve the missed one page. For example: size=0x28006, PAGE_SIZE=0x1000, KASAN_SHADOW_SCALE_SHIFT=3, we will get shadow_size=0x5000, but actually we need 6 pages. shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT, PAGE_SIZE); This can lead to a kernel crash when kasan is enabled and the value of mod->core_layout.size or mod->init_layout.size is like above. Because the shadow memory of X has not been allocated and mapped. move_module: ptr = module_alloc(mod->core_layout.size); ... memset(ptr, 0, mod->core_layout.size); //crashed Unable to handle kernel paging request at virtual address ffff0fffff97b000 ...... Call trace: __asan_storeN+0x174/0x1a8 memset+0x24/0x48 layout_and_allocate+0xcd8/0x1800 load_module+0x190/0x23e8 SyS_finit_module+0x148/0x180 Link: http://lkml.kernel.org/r/1529659626-12660-1-git-send-email-thunder.leizhen@huawei.com Signed-off-by: Zhen Lei Reviewed-by: Dmitriy Vyukov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Hanjun Guo Cc: Libin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/kasan/kasan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index d90f29a166d8..71a4319256b6 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -613,12 +613,13 @@ void kasan_kfree_large(const void *ptr) int kasan_module_alloc(void *addr, size_t size) { void *ret; + size_t scaled_size; size_t shadow_size; unsigned long shadow_start; shadow_start = (unsigned long)kasan_mem_to_shadow(addr); - shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT, - PAGE_SIZE); + scaled_size = (size + KASAN_SHADOW_MASK) >> KASAN_SHADOW_SCALE_SHIFT; + shadow_size = round_up(scaled_size, PAGE_SIZE); if (WARN_ON(!PAGE_ALIGNED(shadow_start))) return -EINVAL; -- GitLab From 5996929435a8418dc1e633d44c13e83383c019d5 Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Tue, 3 Jul 2018 11:21:46 -0400 Subject: [PATCH 0951/1291] smsc75xx: Add workaround for gigabit link up hardware errata. [ Upstream commit d461e3da905332189aad546b2ad9adbe6071c7cc ] In certain conditions, the device may not be able to link in gigabit mode. This software workaround ensures that the device will not enter the failure state. Fixes: d0cad871703b898a442e4049c532ec39168e5b57 ("SMSC75XX USB 2.0 Gigabit Ethernet Devices") Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/smsc75xx.c | 62 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 7a6a1fe79309..05553d252446 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -82,6 +82,9 @@ static bool turbo_mode = true; module_param(turbo_mode, bool, 0644); MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction"); +static int smsc75xx_link_ok_nopm(struct usbnet *dev); +static int smsc75xx_phy_gig_workaround(struct usbnet *dev); + static int __must_check __smsc75xx_read_reg(struct usbnet *dev, u32 index, u32 *data, int in_pm) { @@ -852,6 +855,9 @@ static int smsc75xx_phy_initialize(struct usbnet *dev) return -EIO; } + /* phy workaround for gig link */ + smsc75xx_phy_gig_workaround(dev); + smsc75xx_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM); @@ -987,6 +993,62 @@ static int smsc75xx_wait_ready(struct usbnet *dev, int in_pm) return -EIO; } +static int smsc75xx_phy_gig_workaround(struct usbnet *dev) +{ + struct mii_if_info *mii = &dev->mii; + int ret = 0, timeout = 0; + u32 buf, link_up = 0; + + /* Set the phy in Gig loopback */ + smsc75xx_mdio_write(dev->net, mii->phy_id, MII_BMCR, 0x4040); + + /* Wait for the link up */ + do { + link_up = smsc75xx_link_ok_nopm(dev); + usleep_range(10000, 20000); + timeout++; + } while ((!link_up) && (timeout < 1000)); + + if (timeout >= 1000) { + netdev_warn(dev->net, "Timeout waiting for PHY link up\n"); + return -EIO; + } + + /* phy reset */ + ret = smsc75xx_read_reg(dev, PMT_CTL, &buf); + if (ret < 0) { + netdev_warn(dev->net, "Failed to read PMT_CTL: %d\n", ret); + return ret; + } + + buf |= PMT_CTL_PHY_RST; + + ret = smsc75xx_write_reg(dev, PMT_CTL, buf); + if (ret < 0) { + netdev_warn(dev->net, "Failed to write PMT_CTL: %d\n", ret); + return ret; + } + + timeout = 0; + do { + usleep_range(10000, 20000); + ret = smsc75xx_read_reg(dev, PMT_CTL, &buf); + if (ret < 0) { + netdev_warn(dev->net, "Failed to read PMT_CTL: %d\n", + ret); + return ret; + } + timeout++; + } while ((buf & PMT_CTL_PHY_RST) && (timeout < 100)); + + if (timeout >= 100) { + netdev_warn(dev->net, "timeout waiting for PHY Reset\n"); + return -EIO; + } + + return 0; +} + static int smsc75xx_reset(struct usbnet *dev) { struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); -- GitLab From bf030b2faacbc740170e33e1281ef2f96decc2a7 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Thu, 28 Jun 2018 18:44:02 +0200 Subject: [PATCH 0952/1291] drm/bridge/sii8620: Fix display of packed pixel modes [ Upstream commit fdddc65ab35d575b42aab411b2dc687601eab680 ] Current implementation does not guarantee packed pixel modes working with every dongle. There are some dongles, which require selecting the output mode explicitly. Write proper values to registers in packed_pixel mode, based on how it is done in vendor's code. Select output color space: RGB (no packed pixel) or YCBCR422 (packed pixel). This reverts commit e8b92efa629dac0e70ea4145c5e70616de5f89c8 ("drm/bridge/sii8620: fix display of packed pixel modes in MHL2"). Signed-off-by: Maciej Purski Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1530204243-6370-3-git-send-email-m.purski@samsung.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/sil-sii8620.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 33bd5d80aa51..0cb69ee94ac1 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -1038,21 +1038,36 @@ static void sii8620_stop_video(struct sii8620 *ctx) static void sii8620_set_format(struct sii8620 *ctx) { + u8 out_fmt; + if (sii8620_is_mhl3(ctx)) { sii8620_setbits(ctx, REG_M3_P0CTRL, BIT_M3_P0CTRL_MHL3_P0_PIXEL_MODE_PACKED, ctx->use_packed_pixel ? ~0 : 0); } else { + if (ctx->use_packed_pixel) { + sii8620_write_seq_static(ctx, + REG_VID_MODE, BIT_VID_MODE_M1080P, + REG_MHL_TOP_CTL, BIT_MHL_TOP_CTL_MHL_PP_SEL | 1, + REG_MHLTX_CTL6, 0x60 + ); + } else { sii8620_write_seq_static(ctx, REG_VID_MODE, 0, REG_MHL_TOP_CTL, 1, REG_MHLTX_CTL6, 0xa0 ); + } } + if (ctx->use_packed_pixel) + out_fmt = VAL_TPI_FORMAT(YCBCR422, FULL); + else + out_fmt = VAL_TPI_FORMAT(RGB, FULL); + sii8620_write_seq(ctx, REG_TPI_INPUT, VAL_TPI_FORMAT(RGB, FULL), - REG_TPI_OUTPUT, VAL_TPI_FORMAT(RGB, FULL), + REG_TPI_OUTPUT, out_fmt, ); } -- GitLab From 2b533daef882f7ac6faa46e6e0e47589df1b8e7f Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Wed, 4 Jul 2018 22:36:36 +0900 Subject: [PATCH 0953/1291] samples/bpf: add missing [ Upstream commit 4d5d33a085335ef469c9a87792bcaaaa8e64d8c4 ] This fixes build error regarding redefinition: CLANG-bpf samples/bpf/parse_varlen.o samples/bpf/parse_varlen.c:111:8: error: redefinition of 'vlan_hdr' struct vlan_hdr { ^ ./include/linux/if_vlan.h:38:8: note: previous definition is here So remove duplicate 'struct vlan_hdr' in sample code and include if_vlan.h Signed-off-by: Taeung Song Acked-by: David S. Miller Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- samples/bpf/parse_varlen.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/samples/bpf/parse_varlen.c b/samples/bpf/parse_varlen.c index 95c16324760c..0b6f22feb2c9 100644 --- a/samples/bpf/parse_varlen.c +++ b/samples/bpf/parse_varlen.c @@ -6,6 +6,7 @@ */ #define KBUILD_MODNAME "foo" #include +#include #include #include #include @@ -108,11 +109,6 @@ static int parse_ipv6(void *data, uint64_t nh_off, void *data_end) return 0; } -struct vlan_hdr { - uint16_t h_vlan_TCI; - uint16_t h_vlan_encapsulated_proto; -}; - SEC("varlen") int handle_ingress(struct __sk_buff *skb) { -- GitLab From 3bbb0484a7317707243306352ad5457f64104d68 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Wed, 4 Jul 2018 22:36:37 +0900 Subject: [PATCH 0954/1291] samples/bpf: Check the result of system() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 492b7e894587c151be681f86d4d1d086375f7b45 ] To avoid the below build warning message, use new generate_load() checking the return value. ignoring return value of ‘system’, declared with attribute warn_unused_result And it also refactors the duplicate code of both test_perf_event_all_cpu() and test_perf_event_task() Cc: Teng Qin Signed-off-by: Taeung Song Acked-by: David S. Miller Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- samples/bpf/trace_event_user.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index 7bd827b84a67..c7d525e5696e 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -121,6 +121,16 @@ static void print_stacks(void) } } +static inline int generate_load(void) +{ + if (system("dd if=/dev/zero of=/dev/null count=5000k status=none") < 0) { + printf("failed to generate some load with dd: %s\n", strerror(errno)); + return -1; + } + + return 0; +} + static void test_perf_event_all_cpu(struct perf_event_attr *attr) { int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); @@ -138,7 +148,11 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr) assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0); } - system("dd if=/dev/zero of=/dev/null count=5000k status=none"); + + if (generate_load() < 0) { + error = 1; + goto all_cpu_err; + } print_stacks(); all_cpu_err: for (i--; i >= 0; i--) { @@ -152,7 +166,7 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr) static void test_perf_event_task(struct perf_event_attr *attr) { - int pmu_fd; + int pmu_fd, error = 0; /* open task bound event */ pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0); @@ -162,10 +176,17 @@ static void test_perf_event_task(struct perf_event_attr *attr) } assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0); - system("dd if=/dev/zero of=/dev/null count=5000k status=none"); + + if (generate_load() < 0) { + error = 1; + goto err; + } print_stacks(); +err: ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); close(pmu_fd); + if (error) + int_exit(0); } static void test_bpf_perf_event(void) -- GitLab From f599525df7f1b14bed0da75cd6065cb1742a74ff Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Wed, 4 Jul 2018 22:36:38 +0900 Subject: [PATCH 0955/1291] samples/bpf: Check the error of write() and read() [ Upstream commit 02a2f000a3629274bfad60bfc4de9edec49e63e7 ] test_task_rename() and test_urandom_read() can be failed during write() and read(), So check the result of them. Reviewed-by: David Laight Signed-off-by: Taeung Song Acked-by: David S. Miller Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- samples/bpf/test_overhead_user.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c index d291167fd3c7..7dad9a3168e1 100644 --- a/samples/bpf/test_overhead_user.c +++ b/samples/bpf/test_overhead_user.c @@ -6,6 +6,7 @@ */ #define _GNU_SOURCE #include +#include #include #include #include @@ -44,8 +45,13 @@ static void test_task_rename(int cpu) exit(1); } start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) - write(fd, buf, sizeof(buf)); + for (i = 0; i < MAX_CNT; i++) { + if (write(fd, buf, sizeof(buf)) < 0) { + printf("task rename failed: %s\n", strerror(errno)); + close(fd); + return; + } + } printf("task_rename:%d: %lld events per sec\n", cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); close(fd); @@ -63,8 +69,13 @@ static void test_urandom_read(int cpu) exit(1); } start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) - read(fd, buf, sizeof(buf)); + for (i = 0; i < MAX_CNT; i++) { + if (read(fd, buf, sizeof(buf)) < 0) { + printf("failed to read from /dev/urandom: %s\n", strerror(errno)); + close(fd); + return; + } + } printf("urandom_read:%d: %lld events per sec\n", cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); close(fd); -- GitLab From 2d0da218864349100c33ed71336a7211e37d64a8 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 2 Jul 2018 11:21:47 +0200 Subject: [PATCH 0956/1291] ieee802154: 6lowpan: set IFLA_LINK [ Upstream commit b30c122c0bbb0a1dc413085e177ea09467e65fdb ] Otherwise NetworkManager (and iproute alike) is not able to identify the parent IEEE 802.15.4 interface of a 6LoWPAN link. Signed-off-by: Lubomir Rintel Acked-by: Alexander Aring Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index e9f0489e4229..22db273f15ea 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -90,12 +90,18 @@ static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n) return 0; } +static int lowpan_get_iflink(const struct net_device *dev) +{ + return lowpan_802154_dev(dev)->wdev->ifindex; +} + static const struct net_device_ops lowpan_netdev_ops = { .ndo_init = lowpan_dev_init, .ndo_start_xmit = lowpan_xmit, .ndo_open = lowpan_open, .ndo_stop = lowpan_stop, .ndo_neigh_construct = lowpan_neigh_construct, + .ndo_get_iflink = lowpan_get_iflink, }; static void lowpan_setup(struct net_device *ldev) -- GitLab From 365279b70bf3fd458cfe1d44d0f2c220cff5c362 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Jul 2018 20:25:32 +0200 Subject: [PATCH 0957/1291] netfilter: x_tables: set module owner for icmp(6) matches [ Upstream commit d376bef9c29b3c65aeee4e785fffcd97ef0a9a81 ] nft_compat relies on xt_request_find_match to increment refcount of the module that provides the match/target. The (builtin) icmp matches did't set the module owner so it was possible to rmmod ip(6)tables while icmp extensions were still in use. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/ip_tables.c | 1 + net/ipv6/netfilter/ip6_tables.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 114d4bef1bec..0d1a2cda1bfb 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1894,6 +1894,7 @@ static struct xt_match ipt_builtin_mt[] __read_mostly = { .checkentry = icmp_checkentry, .proto = IPPROTO_ICMP, .family = NFPROTO_IPV4, + .me = THIS_MODULE, }, }; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2e51e0156903..90f5bf2502a7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1907,6 +1907,7 @@ static struct xt_match ip6t_builtin_mt[] __read_mostly = { .checkentry = icmp6_checkentry, .proto = IPPROTO_ICMPV6, .family = NFPROTO_IPV6, + .me = THIS_MODULE, }, }; -- GitLab From 8e39e96f23d8fe5a6114e43ff80e5852ab11e3c9 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 4 Jul 2018 09:58:05 -0400 Subject: [PATCH 0958/1291] ipv6: make ipv6_renew_options() interrupt/kernel safe [ Upstream commit a9ba23d48dbc6ffd08426bb10f05720e0b9f5c14 ] At present the ipv6_renew_options_kern() function ends up calling into access_ok() which is problematic if done from inside an interrupt as access_ok() calls WARN_ON_IN_IRQ() on some (all?) architectures (x86-64 is affected). Example warning/backtrace is shown below: WARNING: CPU: 1 PID: 3144 at lib/usercopy.c:11 _copy_from_user+0x85/0x90 ... Call Trace: ipv6_renew_option+0xb2/0xf0 ipv6_renew_options+0x26a/0x340 ipv6_renew_options_kern+0x2c/0x40 calipso_req_setattr+0x72/0xe0 netlbl_req_setattr+0x126/0x1b0 selinux_netlbl_inet_conn_request+0x80/0x100 selinux_inet_conn_request+0x6d/0xb0 security_inet_conn_request+0x32/0x50 tcp_conn_request+0x35f/0xe00 ? __lock_acquire+0x250/0x16c0 ? selinux_socket_sock_rcv_skb+0x1ae/0x210 ? tcp_rcv_state_process+0x289/0x106b tcp_rcv_state_process+0x289/0x106b ? tcp_v6_do_rcv+0x1a7/0x3c0 tcp_v6_do_rcv+0x1a7/0x3c0 tcp_v6_rcv+0xc82/0xcf0 ip6_input_finish+0x10d/0x690 ip6_input+0x45/0x1e0 ? ip6_rcv_finish+0x1d0/0x1d0 ipv6_rcv+0x32b/0x880 ? ip6_make_skb+0x1e0/0x1e0 __netif_receive_skb_core+0x6f2/0xdf0 ? process_backlog+0x85/0x250 ? process_backlog+0x85/0x250 ? process_backlog+0xec/0x250 process_backlog+0xec/0x250 net_rx_action+0x153/0x480 __do_softirq+0xd9/0x4f7 do_softirq_own_stack+0x2a/0x40 ... While not present in the backtrace, ipv6_renew_option() ends up calling access_ok() via the following chain: access_ok() _copy_from_user() copy_from_user() ipv6_renew_option() The fix presented in this patch is to perform the userspace copy earlier in the call chain such that it is only called when the option data is actually coming from userspace; that place is do_ipv6_setsockopt(). Not only does this solve the problem seen in the backtrace above, it also allows us to simplify the code quite a bit by removing ipv6_renew_options_kern() completely. We also take this opportunity to cleanup ipv6_renew_options()/ipv6_renew_option() a small amount as well. This patch is heavily based on a rough patch by Al Viro. I've taken his original patch, converted a kmemdup() call in do_ipv6_setsockopt() to a memdup_user() call, made better use of the e_inval jump target in the same function, and cleaned up the use ipv6_renew_option() by ipv6_renew_options(). CC: Al Viro Signed-off-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/net/ipv6.h | 9 +--- net/ipv6/calipso.c | 9 ++-- net/ipv6/exthdrs.c | 111 +++++++++++---------------------------- net/ipv6/ipv6_sockglue.c | 27 +++++++--- 4 files changed, 53 insertions(+), 103 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index e59f385da38e..f280c61e019a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -313,14 +313,7 @@ struct ipv6_txoptions *ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, int newtype, - struct ipv6_opt_hdr __user *newopt, - int newoptlen); -struct ipv6_txoptions * -ipv6_renew_options_kern(struct sock *sk, - struct ipv6_txoptions *opt, - int newtype, - struct ipv6_opt_hdr *newopt, - int newoptlen); + struct ipv6_opt_hdr *newopt); struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt); diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 1323b9679cf7..1c0bb9fb76e6 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -799,8 +799,7 @@ static int calipso_opt_update(struct sock *sk, struct ipv6_opt_hdr *hop) { struct ipv6_txoptions *old = txopt_get(inet6_sk(sk)), *txopts; - txopts = ipv6_renew_options_kern(sk, old, IPV6_HOPOPTS, - hop, hop ? ipv6_optlen(hop) : 0); + txopts = ipv6_renew_options(sk, old, IPV6_HOPOPTS, hop); txopt_put(old); if (IS_ERR(txopts)) return PTR_ERR(txopts); @@ -1222,8 +1221,7 @@ static int calipso_req_setattr(struct request_sock *req, if (IS_ERR(new)) return PTR_ERR(new); - txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, - new, new ? ipv6_optlen(new) : 0); + txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new); kfree(new); @@ -1260,8 +1258,7 @@ static void calipso_req_delattr(struct request_sock *req) if (calipso_opt_del(req_inet->ipv6_opt->hopopt, &new)) return; /* Nothing to do */ - txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, - new, new ? ipv6_optlen(new) : 0); + txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new); if (!IS_ERR(txopts)) { txopts = xchg(&req_inet->ipv6_opt, txopts); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index d6189c2a35e4..47a5f8f88c70 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -987,29 +987,21 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) } EXPORT_SYMBOL_GPL(ipv6_dup_options); -static int ipv6_renew_option(void *ohdr, - struct ipv6_opt_hdr __user *newopt, int newoptlen, - int inherit, - struct ipv6_opt_hdr **hdr, - char **p) +static void ipv6_renew_option(int renewtype, + struct ipv6_opt_hdr **dest, + struct ipv6_opt_hdr *old, + struct ipv6_opt_hdr *new, + int newtype, char **p) { - if (inherit) { - if (ohdr) { - memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr)); - *hdr = (struct ipv6_opt_hdr *)*p; - *p += CMSG_ALIGN(ipv6_optlen(*hdr)); - } - } else { - if (newopt) { - if (copy_from_user(*p, newopt, newoptlen)) - return -EFAULT; - *hdr = (struct ipv6_opt_hdr *)*p; - if (ipv6_optlen(*hdr) > newoptlen) - return -EINVAL; - *p += CMSG_ALIGN(newoptlen); - } - } - return 0; + struct ipv6_opt_hdr *src; + + src = (renewtype == newtype ? new : old); + if (!src) + return; + + memcpy(*p, src, ipv6_optlen(src)); + *dest = (struct ipv6_opt_hdr *)*p; + *p += CMSG_ALIGN(ipv6_optlen(*dest)); } /** @@ -1035,13 +1027,11 @@ static int ipv6_renew_option(void *ohdr, */ struct ipv6_txoptions * ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, - int newtype, - struct ipv6_opt_hdr __user *newopt, int newoptlen) + int newtype, struct ipv6_opt_hdr *newopt) { int tot_len = 0; char *p; struct ipv6_txoptions *opt2; - int err; if (opt) { if (newtype != IPV6_HOPOPTS && opt->hopopt) @@ -1054,8 +1044,8 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt)); } - if (newopt && newoptlen) - tot_len += CMSG_ALIGN(newoptlen); + if (newopt) + tot_len += CMSG_ALIGN(ipv6_optlen(newopt)); if (!tot_len) return NULL; @@ -1070,29 +1060,19 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, opt2->tot_len = tot_len; p = (char *)(opt2 + 1); - err = ipv6_renew_option(opt ? opt->hopopt : NULL, newopt, newoptlen, - newtype != IPV6_HOPOPTS, - &opt2->hopopt, &p); - if (err) - goto out; - - err = ipv6_renew_option(opt ? opt->dst0opt : NULL, newopt, newoptlen, - newtype != IPV6_RTHDRDSTOPTS, - &opt2->dst0opt, &p); - if (err) - goto out; - - err = ipv6_renew_option(opt ? opt->srcrt : NULL, newopt, newoptlen, - newtype != IPV6_RTHDR, - (struct ipv6_opt_hdr **)&opt2->srcrt, &p); - if (err) - goto out; - - err = ipv6_renew_option(opt ? opt->dst1opt : NULL, newopt, newoptlen, - newtype != IPV6_DSTOPTS, - &opt2->dst1opt, &p); - if (err) - goto out; + ipv6_renew_option(IPV6_HOPOPTS, &opt2->hopopt, + (opt ? opt->hopopt : NULL), + newopt, newtype, &p); + ipv6_renew_option(IPV6_RTHDRDSTOPTS, &opt2->dst0opt, + (opt ? opt->dst0opt : NULL), + newopt, newtype, &p); + ipv6_renew_option(IPV6_RTHDR, + (struct ipv6_opt_hdr **)&opt2->srcrt, + (opt ? (struct ipv6_opt_hdr *)opt->srcrt : NULL), + newopt, newtype, &p); + ipv6_renew_option(IPV6_DSTOPTS, &opt2->dst1opt, + (opt ? opt->dst1opt : NULL), + newopt, newtype, &p); opt2->opt_nflen = (opt2->hopopt ? ipv6_optlen(opt2->hopopt) : 0) + (opt2->dst0opt ? ipv6_optlen(opt2->dst0opt) : 0) + @@ -1100,37 +1080,6 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, opt2->opt_flen = (opt2->dst1opt ? ipv6_optlen(opt2->dst1opt) : 0); return opt2; -out: - sock_kfree_s(sk, opt2, opt2->tot_len); - return ERR_PTR(err); -} - -/** - * ipv6_renew_options_kern - replace a specific ext hdr with a new one. - * - * @sk: sock from which to allocate memory - * @opt: original options - * @newtype: option type to replace in @opt - * @newopt: new option of type @newtype to replace (kernel-mem) - * @newoptlen: length of @newopt - * - * See ipv6_renew_options(). The difference is that @newopt is - * kernel memory, rather than user memory. - */ -struct ipv6_txoptions * -ipv6_renew_options_kern(struct sock *sk, struct ipv6_txoptions *opt, - int newtype, struct ipv6_opt_hdr *newopt, - int newoptlen) -{ - struct ipv6_txoptions *ret_val; - const mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - ret_val = ipv6_renew_options(sk, opt, newtype, - (struct ipv6_opt_hdr __user *)newopt, - newoptlen); - set_fs(old_fs); - return ret_val; } struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1276d5bd5675..5c91b05c8d8f 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -390,6 +390,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, case IPV6_DSTOPTS: { struct ipv6_txoptions *opt; + struct ipv6_opt_hdr *new = NULL; + + /* hop-by-hop / destination options are privileged option */ + retv = -EPERM; + if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) + break; /* remove any sticky options header with a zero option * length, per RFC3542. @@ -401,17 +407,22 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, else if (optlen < sizeof(struct ipv6_opt_hdr) || optlen & 0x7 || optlen > 8 * 255) goto e_inval; - - /* hop-by-hop / destination options are privileged option */ - retv = -EPERM; - if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) - break; + else { + new = memdup_user(optval, optlen); + if (IS_ERR(new)) { + retv = PTR_ERR(new); + break; + } + if (unlikely(ipv6_optlen(new) > optlen)) { + kfree(new); + goto e_inval; + } + } opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); - opt = ipv6_renew_options(sk, opt, optname, - (struct ipv6_opt_hdr __user *)optval, - optlen); + opt = ipv6_renew_options(sk, opt, optname, new); + kfree(new); if (IS_ERR(opt)) { retv = PTR_ERR(opt); break; -- GitLab From 5860ae76e018bddbda89b98da4a12e7ae20b38cd Mon Sep 17 00:00:00 2001 From: Arun Kumar Neelakantam Date: Wed, 4 Jul 2018 19:49:32 +0530 Subject: [PATCH 0959/1291] net: qrtr: Broadcast messages only from control port [ Upstream commit fdf5fd3975666804118e62c69de25dc85cc0909c ] The broadcast node id should only be sent with the control port id. Signed-off-by: Arun Kumar Neelakantam Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/qrtr/qrtr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 78418f38464a..084adea6a818 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -710,6 +710,10 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) node = NULL; if (addr->sq_node == QRTR_NODE_BCAST) { enqueue_fn = qrtr_bcast_enqueue; + if (addr->sq_port != QRTR_PORT_CTRL) { + release_sock(sk); + return -ENOTCONN; + } } else if (addr->sq_node == ipc->us.sq_node) { enqueue_fn = qrtr_local_enqueue; } else { -- GitLab From 533b9559ff83a986236c62cbb1b439b0b52ff963 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Wed, 4 Jul 2018 11:12:39 +0300 Subject: [PATCH 0960/1291] sh_eth: fix invalid context bug while calling auto-negotiation by ethtool [ Upstream commit 53a710b5044d8475faa6813000b6dd659400ef7b ] Since commit 35b5f6b1a82b ("PHYLIB: Locking fixes for PHY I/O potentially sleeping") phy_start_aneg() function utilizes a mutex to serialize changes to phy state, however the helper function is called in atomic context. The bug can be reproduced by running "ethtool -r" command, the bug is reported if CONFIG_DEBUG_ATOMIC_SLEEP build option is enabled. Fixes: dc19e4e5e02f ("sh: sh_eth: Add support ethtool") Signed-off-by: Vladimir Zapolskiy Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/sh_eth.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 38080e95a82d..22adaa169ddd 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2156,18 +2156,10 @@ static void sh_eth_get_regs(struct net_device *ndev, struct ethtool_regs *regs, static int sh_eth_nway_reset(struct net_device *ndev) { - struct sh_eth_private *mdp = netdev_priv(ndev); - unsigned long flags; - int ret; - if (!ndev->phydev) return -ENODEV; - spin_lock_irqsave(&mdp->lock, flags); - ret = phy_start_aneg(ndev->phydev); - spin_unlock_irqrestore(&mdp->lock, flags); - - return ret; + return phy_start_aneg(ndev->phydev); } static u32 sh_eth_get_msglevel(struct net_device *ndev) -- GitLab From 7947dc71b9ded5d01fda11580fe540c7fee45278 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Wed, 4 Jul 2018 11:12:40 +0300 Subject: [PATCH 0961/1291] sh_eth: fix invalid context bug while changing link options by ethtool [ Upstream commit 5cb3f52a11e18628fc4bee76dd14b1f0b76349de ] The change fixes sleep in atomic context bug, which is encountered every time when link settings are changed by ethtool. Since commit 35b5f6b1a82b ("PHYLIB: Locking fixes for PHY I/O potentially sleeping") phy_start_aneg() function utilizes a mutex to serialize changes to phy state, however that helper function is called in atomic context under a grabbed spinlock, because phy_start_aneg() is called by phy_ethtool_ksettings_set() and by replaced phy_ethtool_sset() helpers from phylib. Now duplex mode setting is enforced in sh_eth_adjust_link() only, also now RX/TX is disabled when link is put down or modifications to E-MAC registers ECMR and GECMR are expected for both cases of checked and ignored link status pin state from E-MAC interrupt handler. For reference the change is a partial rework of commit 1e1b812bbe10 ("sh_eth: fix handling of no LINK signal"). Fixes: dc19e4e5e02f ("sh: sh_eth: Add support ethtool") Signed-off-by: Vladimir Zapolskiy Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/sh_eth.c | 49 ++++++++------------------- 1 file changed, 15 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 22adaa169ddd..abfb9faadbc4 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1821,8 +1821,15 @@ static void sh_eth_adjust_link(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); struct phy_device *phydev = ndev->phydev; + unsigned long flags; int new_state = 0; + spin_lock_irqsave(&mdp->lock, flags); + + /* Disable TX and RX right over here, if E-MAC change is ignored */ + if (mdp->cd->no_psr || mdp->no_ether_link) + sh_eth_rcv_snd_disable(ndev); + if (phydev->link) { if (phydev->duplex != mdp->duplex) { new_state = 1; @@ -1841,18 +1848,21 @@ static void sh_eth_adjust_link(struct net_device *ndev) sh_eth_modify(ndev, ECMR, ECMR_TXF, 0); new_state = 1; mdp->link = phydev->link; - if (mdp->cd->no_psr || mdp->no_ether_link) - sh_eth_rcv_snd_enable(ndev); } } else if (mdp->link) { new_state = 1; mdp->link = 0; mdp->speed = 0; mdp->duplex = -1; - if (mdp->cd->no_psr || mdp->no_ether_link) - sh_eth_rcv_snd_disable(ndev); } + /* Enable TX and RX right over here, if E-MAC change is ignored */ + if ((mdp->cd->no_psr || mdp->no_ether_link) && phydev->link) + sh_eth_rcv_snd_enable(ndev); + + mmiowb(); + spin_unlock_irqrestore(&mdp->lock, flags); + if (new_state && netif_msg_link(mdp)) phy_print_status(phydev); } @@ -1933,39 +1943,10 @@ static int sh_eth_get_link_ksettings(struct net_device *ndev, static int sh_eth_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *cmd) { - struct sh_eth_private *mdp = netdev_priv(ndev); - unsigned long flags; - int ret; - if (!ndev->phydev) return -ENODEV; - spin_lock_irqsave(&mdp->lock, flags); - - /* disable tx and rx */ - sh_eth_rcv_snd_disable(ndev); - - ret = phy_ethtool_ksettings_set(ndev->phydev, cmd); - if (ret) - goto error_exit; - - if (cmd->base.duplex == DUPLEX_FULL) - mdp->duplex = 1; - else - mdp->duplex = 0; - - if (mdp->cd->set_duplex) - mdp->cd->set_duplex(ndev); - -error_exit: - mdelay(1); - - /* enable tx and rx */ - sh_eth_rcv_snd_enable(ndev); - - spin_unlock_irqrestore(&mdp->lock, flags); - - return ret; + return phy_ethtool_ksettings_set(ndev->phydev, cmd); } /* If it is ever necessary to increase SH_ETH_REG_DUMP_MAX_REGS, the -- GitLab From 1465aba3428dc13c9f34607e80ec0b0b9f309c95 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Wed, 4 Jul 2018 11:14:50 +0300 Subject: [PATCH 0962/1291] ravb: fix invalid context bug while calling auto-negotiation by ethtool [ Upstream commit 0973a4dd79fe56a3beecfcff675ba4c01df0b0c1 ] Since commit 35b5f6b1a82b ("PHYLIB: Locking fixes for PHY I/O potentially sleeping") phy_start_aneg() function utilizes a mutex to serialize changes to phy state, however the helper function is called in atomic context. The bug can be reproduced by running "ethtool -r" command, the bug is reported if CONFIG_DEBUG_ATOMIC_SLEEP build option is enabled. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Vladimir Zapolskiy Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/ravb_main.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index fdf30bfa403b..4edbd64d21a6 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1131,15 +1131,10 @@ static int ravb_set_link_ksettings(struct net_device *ndev, static int ravb_nway_reset(struct net_device *ndev) { - struct ravb_private *priv = netdev_priv(ndev); int error = -ENODEV; - unsigned long flags; - if (ndev->phydev) { - spin_lock_irqsave(&priv->lock, flags); + if (ndev->phydev) error = phy_start_aneg(ndev->phydev); - spin_unlock_irqrestore(&priv->lock, flags); - } return error; } -- GitLab From 924d13694a13bf1c6be9a4df744ae2fcfcb1fa71 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Wed, 4 Jul 2018 11:14:51 +0300 Subject: [PATCH 0963/1291] ravb: fix invalid context bug while changing link options by ethtool [ Upstream commit 05925e52a7d379192a5fdff2c33710f573190ead ] The change fixes sleep in atomic context bug, which is encountered every time when link settings are changed by ethtool. Since commit 35b5f6b1a82b ("PHYLIB: Locking fixes for PHY I/O potentially sleeping") phy_start_aneg() function utilizes a mutex to serialize changes to phy state, however that helper function is called in atomic context under a grabbed spinlock, because phy_start_aneg() is called by phy_ethtool_ksettings_set() and by replaced phy_ethtool_sset() helpers from phylib. Now duplex mode setting is enforced in ravb_adjust_link() only, also now RX/TX is disabled when link is put down or modifications to E-MAC registers ECMR and GECMR are expected for both cases of checked and ignored link status pin state from E-MAC interrupt handler. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Vladimir Zapolskiy Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/ravb_main.c | 49 ++++++++---------------- 1 file changed, 15 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 4edbd64d21a6..e87a779bfcfe 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -959,6 +959,13 @@ static void ravb_adjust_link(struct net_device *ndev) struct ravb_private *priv = netdev_priv(ndev); struct phy_device *phydev = ndev->phydev; bool new_state = false; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + /* Disable TX and RX right over here, if E-MAC change is ignored */ + if (priv->no_avb_link) + ravb_rcv_snd_disable(ndev); if (phydev->link) { if (phydev->duplex != priv->duplex) { @@ -976,18 +983,21 @@ static void ravb_adjust_link(struct net_device *ndev) ravb_modify(ndev, ECMR, ECMR_TXF, 0); new_state = true; priv->link = phydev->link; - if (priv->no_avb_link) - ravb_rcv_snd_enable(ndev); } } else if (priv->link) { new_state = true; priv->link = 0; priv->speed = 0; priv->duplex = -1; - if (priv->no_avb_link) - ravb_rcv_snd_disable(ndev); } + /* Enable TX and RX right over here, if E-MAC change is ignored */ + if (priv->no_avb_link && phydev->link) + ravb_rcv_snd_enable(ndev); + + mmiowb(); + spin_unlock_irqrestore(&priv->lock, flags); + if (new_state && netif_msg_link(priv)) phy_print_status(phydev); } @@ -1094,39 +1104,10 @@ static int ravb_get_link_ksettings(struct net_device *ndev, static int ravb_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *cmd) { - struct ravb_private *priv = netdev_priv(ndev); - unsigned long flags; - int error; - if (!ndev->phydev) return -ENODEV; - spin_lock_irqsave(&priv->lock, flags); - - /* Disable TX and RX */ - ravb_rcv_snd_disable(ndev); - - error = phy_ethtool_ksettings_set(ndev->phydev, cmd); - if (error) - goto error_exit; - - if (cmd->base.duplex == DUPLEX_FULL) - priv->duplex = 1; - else - priv->duplex = 0; - - ravb_set_duplex(ndev); - -error_exit: - mdelay(1); - - /* Enable TX and RX */ - ravb_rcv_snd_enable(ndev); - - mmiowb(); - spin_unlock_irqrestore(&priv->lock, flags); - - return error; + return phy_ethtool_ksettings_set(ndev->phydev, cmd); } static int ravb_nway_reset(struct net_device *ndev) -- GitLab From 431f0995a5e86727f69a7fc53007a00fc5ef088d Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 6 Jul 2018 22:15:00 +0200 Subject: [PATCH 0964/1291] ARM: pxa: irq: fix handling of ICMR registers in suspend/resume [ Upstream commit 0c1049dcb4ceec640d8bd797335bcbebdcab44d2 ] PXA3xx platforms have 56 interrupts that are stored in two ICMR registers. The code in pxa_irq_suspend() and pxa_irq_resume() however does a simple division by 32 which only leads to one register being saved at suspend and restored at resume time. The NAND interrupt setting, for instance, is lost. Fix this by using DIV_ROUND_UP() instead. Signed-off-by: Daniel Mack Signed-off-by: Robert Jarzmik Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-pxa/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c index 9c10248fadcc..4e8c2116808e 100644 --- a/arch/arm/mach-pxa/irq.c +++ b/arch/arm/mach-pxa/irq.c @@ -185,7 +185,7 @@ static int pxa_irq_suspend(void) { int i; - for (i = 0; i < pxa_internal_irq_nr / 32; i++) { + for (i = 0; i < DIV_ROUND_UP(pxa_internal_irq_nr, 32); i++) { void __iomem *base = irq_base(i); saved_icmr[i] = __raw_readl(base + ICMR); @@ -204,7 +204,7 @@ static void pxa_irq_resume(void) { int i; - for (i = 0; i < pxa_internal_irq_nr / 32; i++) { + for (i = 0; i < DIV_ROUND_UP(pxa_internal_irq_nr, 32); i++) { void __iomem *base = irq_base(i); __raw_writel(saved_icmr[i], base + ICMR); -- GitLab From de9f2452b1ad4e223b305618df8fa2b7312d8488 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 6 Jul 2018 21:01:06 +0200 Subject: [PATCH 0965/1291] net/sched: act_tunnel_key: fix NULL dereference when 'goto chain' is used [ Upstream commit 38230a3e0e0933bbcf5df6fa469ba0667f667568 ] the control action in the common member of struct tcf_tunnel_key must be a valid value, as it can contain the chain index when 'goto chain' is used. Ensure that the control action can be read as x->tcfa_action, when x is a pointer to struct tc_action and x->ops->type is TCA_ACT_TUNNEL_KEY, to prevent the following command: # tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ > $tcflags dst_mac $h2mac action tunnel_key unset goto chain 1 from causing a NULL dereference when a matching packet is received: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 80000001097ac067 P4D 80000001097ac067 PUD 103b0a067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 3491 Comm: mausezahn Tainted: G E 4.18.0-rc2.auguri+ #421 Hardware name: Hewlett-Packard HP Z220 CMT Workstation/1790, BIOS K51 v01.58 02/07/2013 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff95145ea03c40 EFLAGS: 00010246 RAX: 0000000020000001 RBX: ffff9514499e5800 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000 RBP: ffff95145ea03e60 R08: 0000000000000000 R09: ffff95145ea03c9c R10: ffff95145ea03c78 R11: 0000000000000008 R12: ffff951456a69800 R13: ffff951456a69808 R14: 0000000000000001 R15: ffff95144965ee40 FS: 00007fd67ee11740(0000) GS:ffff95145ea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000001038a2006 CR4: 00000000001606f0 Call Trace: fl_classify+0x1ad/0x1c0 [cls_flower] ? __update_load_avg_se.isra.47+0x1ca/0x1d0 ? __update_load_avg_se.isra.47+0x1ca/0x1d0 ? update_load_avg+0x665/0x690 ? update_load_avg+0x665/0x690 ? kmem_cache_alloc+0x38/0x1c0 tcf_classify+0x89/0x140 __netif_receive_skb_core+0x5ea/0xb70 ? enqueue_entity+0xd0/0x270 ? process_backlog+0x97/0x150 process_backlog+0x97/0x150 net_rx_action+0x14b/0x3e0 __do_softirq+0xde/0x2b4 do_softirq_own_stack+0x2a/0x40 do_softirq.part.18+0x49/0x50 __local_bh_enable_ip+0x49/0x50 __dev_queue_xmit+0x4ab/0x8a0 ? wait_woken+0x80/0x80 ? packet_sendmsg+0x38f/0x810 ? __dev_queue_xmit+0x8a0/0x8a0 packet_sendmsg+0x38f/0x810 sock_sendmsg+0x36/0x40 __sys_sendto+0x10e/0x140 ? do_vfs_ioctl+0xa4/0x630 ? syscall_trace_enter+0x1df/0x2e0 ? __audit_syscall_exit+0x22a/0x290 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fd67e18dc93 Code: 48 8b 0d 18 83 20 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 59 c7 20 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 2b f7 ff ff 48 89 04 24 RSP: 002b:00007ffe0189b748 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000020ca010 RCX: 00007fd67e18dc93 RDX: 0000000000000062 RSI: 00000000020ca322 RDI: 0000000000000003 RBP: 00007ffe0189b780 R08: 00007ffe0189b760 R09: 0000000000000014 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000062 R13: 00000000020ca322 R14: 00007ffe0189b760 R15: 0000000000000003 Modules linked in: act_tunnel_key act_gact cls_flower sch_ingress vrf veth act_csum(E) xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter intel_rapl snd_hda_codec_hdmi x86_pkg_temp_thermal intel_powerclamp snd_hda_codec_realtek coretemp snd_hda_codec_generic kvm_intel kvm irqbypass snd_hda_intel crct10dif_pclmul crc32_pclmul hp_wmi ghash_clmulni_intel pcbc snd_hda_codec aesni_intel sparse_keymap rfkill snd_hda_core snd_hwdep snd_seq crypto_simd iTCO_wdt gpio_ich iTCO_vendor_support wmi_bmof cryptd mei_wdt glue_helper snd_seq_device snd_pcm pcspkr snd_timer snd i2c_i801 lpc_ich sg soundcore wmi mei_me mei ie31200_edac nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod sr_mod cdrom i915 video i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ahci crc32c_intel libahci serio_raw sfc libata mtd drm ixgbe mdio i2c_core e1000e dca CR2: 0000000000000000 ---[ end trace 1ab8b5b5d4639dfc ]--- RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff95145ea03c40 EFLAGS: 00010246 RAX: 0000000020000001 RBX: ffff9514499e5800 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000 RBP: ffff95145ea03e60 R08: 0000000000000000 R09: ffff95145ea03c9c R10: ffff95145ea03c78 R11: 0000000000000008 R12: ffff951456a69800 R13: ffff951456a69808 R14: 0000000000000001 R15: ffff95144965ee40 FS: 00007fd67ee11740(0000) GS:ffff95145ea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000001038a2006 CR4: 00000000001606f0 Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: 0x11400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Fixes: d0f6dd8a914f ("net/sched: Introduce act_tunnel_key") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/net/tc_act/tc_tunnel_key.h | 1 - net/sched/act_tunnel_key.c | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index efef0b4b1b2b..46b8c7f1c8d5 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -18,7 +18,6 @@ struct tcf_tunnel_key_params { struct rcu_head rcu; int tcft_action; - int action; struct metadata_dst *tcft_enc_metadata; }; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 7cb63616805d..cd51f2ed55fa 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -36,7 +36,7 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, tcf_lastuse_update(&t->tcf_tm); bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb); - action = params->action; + action = READ_ONCE(t->tcf_action); switch (params->tcft_action) { case TCA_TUNNEL_KEY_ACT_RELEASE: @@ -182,7 +182,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, params_old = rtnl_dereference(t->params); - params_new->action = parm->action; + t->tcf_action = parm->action; params_new->tcft_action = parm->t_action; params_new->tcft_enc_metadata = metadata; @@ -254,13 +254,13 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, .index = t->tcf_index, .refcnt = t->tcf_refcnt - ref, .bindcnt = t->tcf_bindcnt - bind, + .action = t->tcf_action, }; struct tcf_t tm; params = rtnl_dereference(t->params); opt.t_action = params->tcft_action; - opt.action = params->action; if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt)) goto nla_put_failure; -- GitLab From 1b8f1ab088baa4cb29c1373d529955786be8fdc1 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 18 Jun 2018 18:30:43 +0100 Subject: [PATCH 0966/1291] nvmem: Don't let a NULL cell_id for nvmem_cell_get() crash us [ Upstream commit 87ed1405ef09d29a14df43295f7b6a93b63bfe6e ] In commit ca04d9d3e1b1 ("phy: qcom-qusb2: New driver for QUSB2 PHY on Qcom chips") you can see a call like: devm_nvmem_cell_get(dev, NULL); Note that the cell ID passed to the function is NULL. This is because the qcom-qusb2 driver is expected to work only on systems where the PHY node is hooked up via device-tree and is nameless. This works OK for the most part. The first thing nvmem_cell_get() does is to call of_nvmem_cell_get() and there it's documented that a NULL name is fine. The problem happens when the call to of_nvmem_cell_get() returns -EINVAL. In such a case we'll fall back to nvmem_cell_get_from_list() and eventually might (if nvmem_cells isn't an empty list) crash with something that looks like: strcmp nvmem_find_cell __nvmem_device_get nvmem_cell_get_from_list nvmem_cell_get devm_nvmem_cell_get qusb2_phy_probe There are several different ways we could fix this problem: One could argue that perhaps the qcom-qusb2 driver should be changed to use of_nvmem_cell_get() which is allowed to have a NULL name. In that case, we'd need to add a patche to introduce devm_of_nvmem_cell_get() since the qcom-qusb2 driver is using devm managed resources. One could also argue that perhaps we could just add a name to qcom-qusb2. That would be OK but I believe it effectively changes the device tree bindings, so maybe it's a no-go. In this patch I have chosen to fix the problem by simply not crashing when a NULL cell_id is passed to nvmem_cell_get(). NOTE: that for the qcom-qusb2 driver the "nvmem-cells" property is defined to be optional and thus it's expected to be a common case that we would hit this crash and this is more than just a theoretical fix. Fixes: ca04d9d3e1b1 ("phy: qcom-qusb2: New driver for QUSB2 PHY on Qcom chips") Signed-off-by: Douglas Anderson Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 2afafd5d8915..635886e4835c 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -865,6 +865,10 @@ struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *cell_id) return cell; } + /* NULL cell_id only allowed for device tree; invalid otherwise */ + if (!cell_id) + return ERR_PTR(-EINVAL); + return nvmem_cell_get_from_list(cell_id); } EXPORT_SYMBOL_GPL(nvmem_cell_get); -- GitLab From 68e70bc28d53a4b09dd7bc1fab2df911dd284bd8 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:13:53 +0200 Subject: [PATCH 0967/1291] ieee802154: at86rf230: switch from BUG_ON() to WARN_ON() on problem [ Upstream commit 20f330452ad8814f2289a589baf65e21270879a7 ] The check is valid but it does not warrant to crash the kernel. A WARN_ON() is good enough here. Found by checkpatch. Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ieee802154/at86rf230.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 548d9d026a85..ea1704a4b4b9 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -940,7 +940,7 @@ at86rf230_xmit(struct ieee802154_hw *hw, struct sk_buff *skb) static int at86rf230_ed(struct ieee802154_hw *hw, u8 *level) { - BUG_ON(!level); + WARN_ON(!level); *level = 0xbe; return 0; } -- GitLab From c1f84e47ec2621b806e6f268f8f5a889b43b3186 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:13:54 +0200 Subject: [PATCH 0968/1291] ieee802154: at86rf230: use __func__ macro for debug messages [ Upstream commit 8a81388ec27c4c0adbdecd20e67bb5f411ab46b2 ] Instead of having the function name hard-coded (it might change and we forgot to update them in the debug output) we can use __func__ instead and also shorter the line so we do not need to break it. Also fix an extra blank line while being here. Found by checkpatch. Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ieee802154/at86rf230.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index ea1704a4b4b9..5c48bdb6f678 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1121,8 +1121,7 @@ at86rf230_set_hw_addr_filt(struct ieee802154_hw *hw, if (changed & IEEE802154_AFILT_SADDR_CHANGED) { u16 addr = le16_to_cpu(filt->short_addr); - dev_vdbg(&lp->spi->dev, - "at86rf230_set_hw_addr_filt called for saddr\n"); + dev_vdbg(&lp->spi->dev, "%s called for saddr\n", __func__); __at86rf230_write(lp, RG_SHORT_ADDR_0, addr); __at86rf230_write(lp, RG_SHORT_ADDR_1, addr >> 8); } @@ -1130,8 +1129,7 @@ at86rf230_set_hw_addr_filt(struct ieee802154_hw *hw, if (changed & IEEE802154_AFILT_PANID_CHANGED) { u16 pan = le16_to_cpu(filt->pan_id); - dev_vdbg(&lp->spi->dev, - "at86rf230_set_hw_addr_filt called for pan id\n"); + dev_vdbg(&lp->spi->dev, "%s called for pan id\n", __func__); __at86rf230_write(lp, RG_PAN_ID_0, pan); __at86rf230_write(lp, RG_PAN_ID_1, pan >> 8); } @@ -1140,15 +1138,13 @@ at86rf230_set_hw_addr_filt(struct ieee802154_hw *hw, u8 i, addr[8]; memcpy(addr, &filt->ieee_addr, 8); - dev_vdbg(&lp->spi->dev, - "at86rf230_set_hw_addr_filt called for IEEE addr\n"); + dev_vdbg(&lp->spi->dev, "%s called for IEEE addr\n", __func__); for (i = 0; i < 8; i++) __at86rf230_write(lp, RG_IEEE_ADDR_0 + i, addr[i]); } if (changed & IEEE802154_AFILT_PANC_CHANGED) { - dev_vdbg(&lp->spi->dev, - "at86rf230_set_hw_addr_filt called for panc change\n"); + dev_vdbg(&lp->spi->dev, "%s called for panc change\n", __func__); if (filt->pan_coord) at86rf230_write_subreg(lp, SR_AACK_I_AM_COORD, 1); else @@ -1252,7 +1248,6 @@ at86rf230_set_cca_mode(struct ieee802154_hw *hw, return at86rf230_write_subreg(lp, SR_CCA_MODE, val); } - static int at86rf230_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm) { -- GitLab From 1b57c5876c08fa8b6a79165ed99ddd1cc1f2438f Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:14:05 +0200 Subject: [PATCH 0969/1291] ieee802154: fakelb: switch from BUG_ON() to WARN_ON() on problem [ Upstream commit 8f2fbc6c60ff213369e06a73610fc882a42fdf20 ] The check is valid but it does not warrant to crash the kernel. A WARN_ON() is good enough here. Found by checkpatch. Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ieee802154/fakelb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c index 0d673f7682ee..176395e4b7bb 100644 --- a/drivers/net/ieee802154/fakelb.c +++ b/drivers/net/ieee802154/fakelb.c @@ -49,7 +49,7 @@ struct fakelb_phy { static int fakelb_hw_ed(struct ieee802154_hw *hw, u8 *level) { - BUG_ON(!level); + WARN_ON(!level); *level = 0xbe; return 0; -- GitLab From d3f205d2d3cdbd57b67a16b0dcd197bcf19e6111 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 6 Jul 2018 21:02:36 +0300 Subject: [PATCH 0970/1291] gpu: host1x: Check whether size of unpin isn't 0 [ Upstream commit ec58923215dbbeef59ee82923ee94d745f73db58 ] Only gather pins are mapped by the Host1x driver, regular BO relocations are not. Check whether size of unpin isn't 0, otherwise IOVA allocation at 0x0 could be erroneously released. Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/host1x/job.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index db509ab8874e..acd99783bbca 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -686,7 +686,8 @@ void host1x_job_unpin(struct host1x_job *job) for (i = 0; i < job->num_unpins; i++) { struct host1x_job_unpin_data *unpin = &job->unpins[i]; - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { + if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && + unpin->size && host->domain) { iommu_unmap(host->domain, job->addr_phys[i], unpin->size); free_iova(&host->iova, -- GitLab From f2cbde943d9797f49ff484408e1edb4fd9bccaae Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Wed, 20 Jun 2018 16:03:58 +0300 Subject: [PATCH 0971/1291] drm/tegra: Fix comparison operator for buffer size [ Upstream commit 5265f0338bc0feec6c0d544dfe005dec1a93cb93 ] Here we are checking for the buffer length, not an offset for writing to, so using > is correct. The current code incorrectly rejects a command buffer ending at the memory buffer's end. Signed-off-by: Mikko Perttunen Reviewed-by: Dmitry Osipenko Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/tegra/drm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 0598b4c18c25..75b1c8c03ce9 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -470,7 +470,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, * unaligned offset is malformed and cause commands stream * corruption on the buffer address relocation. */ - if (offset & 3 || offset >= obj->gem.size) { + if (offset & 3 || offset > obj->gem.size) { err = -EINVAL; goto fail; } -- GitLab From 3d211fefdd52a717e91b5f50fa578c810a2c2596 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 24 Jun 2018 14:35:10 +0100 Subject: [PATCH 0972/1291] drm/armada: fix colorkey mode property [ Upstream commit d378859a667edc99e3473704847698cae97ca2b1 ] The colorkey mode property was not correctly disabling the colorkeying when "disabled" mode was selected. Arrange for this to work as one would expect. Signed-off-by: Russell King Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/armada/armada_hw.h | 1 + drivers/gpu/drm/armada/armada_overlay.c | 30 ++++++++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/armada/armada_hw.h b/drivers/gpu/drm/armada/armada_hw.h index 27319a8335e2..345dc4d0851e 100644 --- a/drivers/gpu/drm/armada/armada_hw.h +++ b/drivers/gpu/drm/armada/armada_hw.h @@ -160,6 +160,7 @@ enum { CFG_ALPHAM_GRA = 0x1 << 16, CFG_ALPHAM_CFG = 0x2 << 16, CFG_ALPHA_MASK = 0xff << 8, +#define CFG_ALPHA(x) ((x) << 8) CFG_PIXCMD_MASK = 0xff, }; diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c index edc44910d79f..2076346b09ee 100644 --- a/drivers/gpu/drm/armada/armada_overlay.c +++ b/drivers/gpu/drm/armada/armada_overlay.c @@ -28,6 +28,7 @@ struct armada_ovl_plane_properties { uint16_t contrast; uint16_t saturation; uint32_t colorkey_mode; + uint32_t colorkey_enable; }; struct armada_ovl_plane { @@ -59,11 +60,13 @@ armada_ovl_update_attr(struct armada_ovl_plane_properties *prop, writel_relaxed(0x00002000, dcrtc->base + LCD_SPU_CBSH_HUE); spin_lock_irq(&dcrtc->irq_lock); - armada_updatel(prop->colorkey_mode | CFG_ALPHAM_GRA, - CFG_CKMODE_MASK | CFG_ALPHAM_MASK | CFG_ALPHA_MASK, - dcrtc->base + LCD_SPU_DMA_CTRL1); - - armada_updatel(ADV_GRACOLORKEY, 0, dcrtc->base + LCD_SPU_ADV_REG); + armada_updatel(prop->colorkey_mode, + CFG_CKMODE_MASK | CFG_ALPHAM_MASK | CFG_ALPHA_MASK, + dcrtc->base + LCD_SPU_DMA_CTRL1); + if (dcrtc->variant->has_spu_adv_reg) + armada_updatel(prop->colorkey_enable, + ADV_GRACOLORKEY | ADV_VIDCOLORKEY, + dcrtc->base + LCD_SPU_ADV_REG); spin_unlock_irq(&dcrtc->irq_lock); } @@ -339,8 +342,17 @@ static int armada_ovl_plane_set_property(struct drm_plane *plane, dplane->prop.colorkey_vb |= K2B(val); update_attr = true; } else if (property == priv->colorkey_mode_prop) { - dplane->prop.colorkey_mode &= ~CFG_CKMODE_MASK; - dplane->prop.colorkey_mode |= CFG_CKMODE(val); + if (val == CKMODE_DISABLE) { + dplane->prop.colorkey_mode = + CFG_CKMODE(CKMODE_DISABLE) | + CFG_ALPHAM_CFG | CFG_ALPHA(255); + dplane->prop.colorkey_enable = 0; + } else { + dplane->prop.colorkey_mode = + CFG_CKMODE(val) | + CFG_ALPHAM_GRA | CFG_ALPHA(0); + dplane->prop.colorkey_enable = ADV_GRACOLORKEY; + } update_attr = true; } else if (property == priv->brightness_prop) { dplane->prop.brightness = val - 256; @@ -470,7 +482,9 @@ int armada_overlay_plane_create(struct drm_device *dev, unsigned long crtcs) dplane->prop.colorkey_yr = 0xfefefe00; dplane->prop.colorkey_ug = 0x01010100; dplane->prop.colorkey_vb = 0x01010100; - dplane->prop.colorkey_mode = CFG_CKMODE(CKMODE_RGB); + dplane->prop.colorkey_mode = CFG_CKMODE(CKMODE_RGB) | + CFG_ALPHAM_GRA | CFG_ALPHA(0); + dplane->prop.colorkey_enable = ADV_GRACOLORKEY; dplane->prop.brightness = 0; dplane->prop.contrast = 0x4000; dplane->prop.saturation = 0x4000; -- GitLab From afd13311ee014d52b2d1ca9c452a157b836d987f Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 26 Jun 2018 17:06:06 +0100 Subject: [PATCH 0973/1291] drm/armada: fix irq handling [ Upstream commit 92298c1cd8e8a6b56322b602ad72b54e6237631d ] Add the missing locks to the IRQ enable/disable paths, and fix a comment in the interrupt handler: reading the ISR clears down the status bits, but does not reset the interrupt so it can signal again. That seems to require a write. Signed-off-by: Russell King Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/armada/armada_crtc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 79ce877bf45f..3039936f8f3f 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -483,8 +483,9 @@ static irqreturn_t armada_drm_irq(int irq, void *arg) u32 v, stat = readl_relaxed(dcrtc->base + LCD_SPU_IRQ_ISR); /* - * This is rediculous - rather than writing bits to clear, we - * have to set the actual status register value. This is racy. + * Reading the ISR appears to clear bits provided CLEAN_SPU_IRQ_ISR + * is set. Writing has some other effect to acknowledge the IRQ - + * without this, we only get a single IRQ. */ writel_relaxed(0, dcrtc->base + LCD_SPU_IRQ_ISR); @@ -1104,16 +1105,22 @@ armada_drm_crtc_set_property(struct drm_crtc *crtc, static int armada_drm_crtc_enable_vblank(struct drm_crtc *crtc) { struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); + unsigned long flags; + spin_lock_irqsave(&dcrtc->irq_lock, flags); armada_drm_crtc_enable_irq(dcrtc, VSYNC_IRQ_ENA); + spin_unlock_irqrestore(&dcrtc->irq_lock, flags); return 0; } static void armada_drm_crtc_disable_vblank(struct drm_crtc *crtc) { struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); + unsigned long flags; + spin_lock_irqsave(&dcrtc->irq_lock, flags); armada_drm_crtc_disable_irq(dcrtc, VSYNC_IRQ_ENA); + spin_unlock_irqrestore(&dcrtc->irq_lock, flags); } static const struct drm_crtc_funcs armada_crtc_funcs = { @@ -1221,6 +1228,7 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, CFG_PDWN64x66, dcrtc->base + LCD_SPU_SRAM_PARA1); writel_relaxed(0x2032ff81, dcrtc->base + LCD_SPU_DMA_CTRL1); writel_relaxed(dcrtc->irq_ena, dcrtc->base + LCD_SPU_IRQ_ENA); + readl_relaxed(dcrtc->base + LCD_SPU_IRQ_ISR); writel_relaxed(0, dcrtc->base + LCD_SPU_IRQ_ISR); ret = devm_request_irq(dev, irq, armada_drm_irq, 0, "armada_drm_crtc", -- GitLab From a76e62b50dc0fb869a5371448395700eb3d31bd3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 6 Jul 2018 20:06:05 +0200 Subject: [PATCH 0974/1291] netfilter: nft_compat: explicitly reject ERROR and standard target [ Upstream commit 21d5e078192d244df3d6049f9464fff2f72cfd68 ] iptables-nft never requests these, but make this explicitly illegal. If it were quested, kernel could oops as ->eval is NULL, furthermore, the builtin targets have no owning module so its possible to rmmod eb/ip/ip6_tables module even if they would be loaded. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_compat.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 3bd637eadc42..6da1cec1494a 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -825,10 +825,18 @@ nft_target_select_ops(const struct nft_ctx *ctx, rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV])); family = ctx->afi->family; + if (strcmp(tg_name, XT_ERROR_TARGET) == 0 || + strcmp(tg_name, XT_STANDARD_TARGET) == 0 || + strcmp(tg_name, "standard") == 0) + return ERR_PTR(-EINVAL); + /* Re-use the existing target if it's already loaded. */ list_for_each_entry(nft_target, &nft_target_list, head) { struct xt_target *target = nft_target->ops.data; + if (!target->target) + continue; + if (nft_target_cmp(target, tg_name, rev, family)) return &nft_target->ops; } @@ -837,6 +845,11 @@ nft_target_select_ops(const struct nft_ctx *ctx, if (IS_ERR(target)) return ERR_PTR(-ENOENT); + if (!target->target) { + err = -EINVAL; + goto err; + } + if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) { err = -EINVAL; goto err; -- GitLab From e653e79ace50feb02030b5d4728e282c839c24fc Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 6 Jul 2018 16:38:53 +0300 Subject: [PATCH 0975/1291] netfilter: nf_conntrack: Fix possible possible crash on module loading. [ Upstream commit 2045cdfa1b40d66f126f3fd05604fc7c754f0022 ] Loading the nf_conntrack module with doubled hashsize parameter, i.e. modprobe nf_conntrack hashsize=12345 hashsize=12345 causes NULL-ptr deref. If 'hashsize' specified twice, the nf_conntrack_set_hashsize() function will be called also twice. The first nf_conntrack_set_hashsize() call will set the 'nf_conntrack_htable_size' variable: nf_conntrack_set_hashsize() ... /* On boot, we can set this without any fancy locking. */ if (!nf_conntrack_htable_size) return param_set_uint(val, kp); But on the second invocation, the nf_conntrack_htable_size is already set, so the nf_conntrack_set_hashsize() will take a different path and call the nf_conntrack_hash_resize() function. Which will crash on the attempt to dereference 'nf_conntrack_hash' pointer: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 RIP: 0010:nf_conntrack_hash_resize+0x255/0x490 [nf_conntrack] Call Trace: nf_conntrack_set_hashsize+0xcd/0x100 [nf_conntrack] parse_args+0x1f9/0x5a0 load_module+0x1281/0x1a50 __se_sys_finit_module+0xbe/0xf0 do_syscall_64+0x7c/0x390 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fix this, by checking !nf_conntrack_hash instead of !nf_conntrack_htable_size. nf_conntrack_hash will be initialized only after the module loaded, so the second invocation of the nf_conntrack_set_hashsize() won't crash, it will just reinitialize nf_conntrack_htable_size again. Signed-off-by: Andrey Ryabinin Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 01130392b7c0..a268acc48af0 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1949,7 +1949,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) return -EOPNOTSUPP; /* On boot, we can set this without any fancy locking. */ - if (!nf_conntrack_htable_size) + if (!nf_conntrack_hash) return param_set_uint(val, kp); rc = kstrtouint(val, 0, &hashsize); -- GitLab From 09af6c90819d9bdd9f8ce6233308cead86625965 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Jun 2018 17:22:05 +0300 Subject: [PATCH 0976/1291] ARC: Improve cmpxchg syscall implementation [ Upstream commit e8708786d4fe21c043d38d760f768949a3d71185 ] This is used in configs lacking hardware atomics to emulate atomic r-m-w for user space, implemented by disabling preemption in kernel. However there are issues in current implementation: 1. Process not terminated if invalid user pointer passed: i.e. __get_user() failed. 2. The reason for this patch was __put_user() failure not being handled either, specifically for the COW break scenario. The zero page is initially wired up and read from __get_user() succeeds. A subsequent write by __put_user() induces a Protection Violation, but COW can't finish as Linux page fault handler is disabled due to preempt disable. And what's worse is we silently return the stale value to user space. Fix this specific case by re-enabling preemption and explicitly fixing up the fault and retrying the whole sequence over. Cc: Max Filippov Cc: linux-arch@vger.kernel.org Signed-off-by: Alexey Brodkin Signed-off-by: Peter Zijlstra Signed-off-by: Vineet Gupta [vgupta: rewrote the changelog] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/process.c | 47 ++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 5ac3b547453f..4674541eba3f 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -47,7 +47,8 @@ SYSCALL_DEFINE0(arc_gettls) SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new) { struct pt_regs *regs = current_pt_regs(); - int uval = -EFAULT; + u32 uval; + int ret; /* * This is only for old cores lacking LLOCK/SCOND, which by defintion @@ -60,23 +61,47 @@ SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new) /* Z indicates to userspace if operation succeded */ regs->status32 &= ~STATUS_Z_MASK; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; + ret = access_ok(VERIFY_WRITE, uaddr, sizeof(*uaddr)); + if (!ret) + goto fail; +again: preempt_disable(); - if (__get_user(uval, uaddr)) - goto done; + ret = __get_user(uval, uaddr); + if (ret) + goto fault; - if (uval == expected) { - if (!__put_user(new, uaddr)) - regs->status32 |= STATUS_Z_MASK; - } + if (uval != expected) + goto out; -done: - preempt_enable(); + ret = __put_user(new, uaddr); + if (ret) + goto fault; + + regs->status32 |= STATUS_Z_MASK; +out: + preempt_enable(); return uval; + +fault: + preempt_enable(); + + if (unlikely(ret != -EFAULT)) + goto fail; + + down_read(¤t->mm->mmap_sem); + ret = fixup_user_fault(current, current->mm, (unsigned long) uaddr, + FAULT_FLAG_WRITE, NULL); + up_read(¤t->mm->mmap_sem); + + if (likely(!ret)) + goto again; + +fail: + force_sig(SIGSEGV, current); + return ret; } #ifdef CONFIG_ISA_ARCV2 -- GitLab From 822bbc4c7966392501d43f9f205a6af09e492567 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 9 Jul 2018 02:24:48 -0400 Subject: [PATCH 0977/1291] bnxt_en: Fix inconsistent BNXT_FLAG_AGG_RINGS logic. [ Upstream commit 07f4fde53d12eb8d921b465bb298e964e0bdc38c ] If there aren't enough RX rings available, the driver will attempt to use a single RX ring without the aggregation ring. If that also fails, the BNXT_FLAG_AGG_RINGS flag is cleared but the other ring parameters are not set consistently to reflect that. If more RX rings become available at the next open, the RX rings will be in an inconsistent state and may crash when freeing the RX rings. Fix it by restoring the BNXT_FLAG_AGG_RINGS if not enough RX rings are available to run without aggregation rings. Fixes: bdbd1eb59c56 ("bnxt_en: Handle no aggregation ring gracefully.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 94931318587c..0340b2f4df41 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7884,8 +7884,11 @@ static int bnxt_get_dflt_rings(struct bnxt *bp, int *max_rx, int *max_tx, /* Not enough rings, try disabling agg rings. */ bp->flags &= ~BNXT_FLAG_AGG_RINGS; rc = bnxt_get_max_rings(bp, max_rx, max_tx, shared); - if (rc) + if (rc) { + /* set BNXT_FLAG_AGG_RINGS back for consistency */ + bp->flags |= BNXT_FLAG_AGG_RINGS; return rc; + } bp->flags |= BNXT_FLAG_NO_AGG_RINGS; bp->dev->hw_features &= ~NETIF_F_LRO; bp->dev->features &= ~NETIF_F_LRO; -- GitLab From cab718feb18f6e4900f8951e966804d988d39a2d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 9 Jul 2018 02:24:49 -0400 Subject: [PATCH 0978/1291] bnxt_en: Always set output parameters in bnxt_get_max_rings(). [ Upstream commit 78f058a4aa0f2280dc4d45d2c4a95728398ef857 ] The current code returns -ENOMEM and does not bother to set the output parameters to 0 when no rings are available. Some callers, such as bnxt_get_channels() will display garbage ring numbers when that happens. Fix it by always setting the output parameters. Fixes: 6e6c5a57fbe1 ("bnxt_en: Modify bnxt_get_max_rings() to support shared or non shared rings.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0340b2f4df41..4be44396e449 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7866,11 +7866,11 @@ int bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx, bool shared) int rx, tx, cp; _bnxt_get_max_rings(bp, &rx, &tx, &cp); + *max_rx = rx; + *max_tx = tx; if (!rx || !tx || !cp) return -ENOMEM; - *max_rx = rx; - *max_tx = tx; return bnxt_trim_rings(bp, max_rx, max_tx, cp, shared); } -- GitLab From fdf578210a0eb0b600c68795377cbf4999a24a25 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Mon, 9 Jul 2018 02:24:52 -0400 Subject: [PATCH 0979/1291] bnxt_en: Fix for system hang if request_irq fails [ Upstream commit c58387ab1614f6d7fb9e244f214b61e7631421fc ] Fix bug in the error code path when bnxt_request_irq() returns failure. bnxt_disable_napi() should not be called in this error path because NAPI has not been enabled yet. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4be44396e449..937db8019289 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6348,7 +6348,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) rc = bnxt_request_irq(bp); if (rc) { netdev_err(bp->dev, "bnxt_request_irq err: %x\n", rc); - goto open_err; + goto open_err_irq; } } @@ -6386,6 +6386,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) open_err: bnxt_disable_napi(bp); + +open_err_irq: bnxt_del_napi(bp); open_err_free_mem: -- GitLab From a38adc3e104b96f49ff9d9a583e8a1b1256dfff3 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Thu, 5 Jul 2018 07:01:33 -0700 Subject: [PATCH 0980/1291] scsi: qedf: Send the driver state to MFW [ Upstream commit 6ac174756dfc9884f08b23af840ca911155f5578 ] Need to notify firmware when driver is loaded and unloaded. Signed-off-by: Saurav Kashyap Signed-off-by: Chad Dupuis Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qedf/qedf_main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 382edb79a0de..56bcdd412d26 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -3240,6 +3240,11 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) init_completion(&qedf->flogi_compl); + status = qed_ops->common->update_drv_state(qedf->cdev, true); + if (status) + QEDF_ERR(&(qedf->dbg_ctx), + "Failed to send drv state to MFW.\n"); + memset(&link_params, 0, sizeof(struct qed_link_params)); link_params.link_up = true; status = qed_ops->common->set_link(qedf->cdev, &link_params); @@ -3288,6 +3293,7 @@ static int qedf_probe(struct pci_dev *pdev, const struct pci_device_id *id) static void __qedf_remove(struct pci_dev *pdev, int mode) { struct qedf_ctx *qedf; + int rc; if (!pdev) { QEDF_ERR(NULL, "pdev is NULL.\n"); @@ -3382,6 +3388,12 @@ static void __qedf_remove(struct pci_dev *pdev, int mode) qed_ops->common->set_power_state(qedf->cdev, PCI_D0); pci_set_drvdata(pdev, NULL); } + + rc = qed_ops->common->update_drv_state(qedf->cdev, false); + if (rc) + QEDF_ERR(&(qedf->dbg_ctx), + "Failed to send drv state to MFW.\n"); + qed_ops->common->slowpath_stop(qedf->cdev); qed_ops->common->remove(qedf->cdev); -- GitLab From e238b259b7741164e75dc79dda02c790655c0ce1 Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Mon, 2 Jul 2018 23:52:31 -0700 Subject: [PATCH 0981/1291] scsi: qedi: Send driver state to MFW [ Upstream commit a3440d0d2f57f7ba102fc332086961cf261180af ] In case of iSCSI offload BFS environment, MFW requires to mark virtual link based upon qedi load status. Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qedi/qedi_main.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 1573749fe615..e7daadc089fc 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -2087,6 +2087,7 @@ static int qedi_setup_boot_info(struct qedi_ctx *qedi) static void __qedi_remove(struct pci_dev *pdev, int mode) { struct qedi_ctx *qedi = pci_get_drvdata(pdev); + int rval; if (qedi->tmf_thread) { flush_workqueue(qedi->tmf_thread); @@ -2116,6 +2117,10 @@ static void __qedi_remove(struct pci_dev *pdev, int mode) if (mode == QEDI_MODE_NORMAL) qedi_free_iscsi_pf_param(qedi); + rval = qedi_ops->common->update_drv_state(qedi->cdev, false); + if (rval) + QEDI_ERR(&qedi->dbg_ctx, "Failed to send drv state to MFW\n"); + if (!test_bit(QEDI_IN_OFFLINE, &qedi->flags)) { qedi_ops->common->slowpath_stop(qedi->cdev); qedi_ops->common->remove(qedi->cdev); @@ -2390,6 +2395,12 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) if (qedi_setup_boot_info(qedi)) QEDI_ERR(&qedi->dbg_ctx, "No iSCSI boot target configured\n"); + + rc = qedi_ops->common->update_drv_state(qedi->cdev, true); + if (rc) + QEDI_ERR(&qedi->dbg_ctx, + "Failed to send drv state to MFW\n"); + } return 0; -- GitLab From 42a061a16675f6cf439228e6582d7596c19f7e8d Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 29 Jun 2018 12:46:52 -0500 Subject: [PATCH 0982/1291] perf llvm-utils: Remove bashism from kernel include fetch script [ Upstream commit f6432b9f65001651412dbc3589d251534822d4ab ] Like system(), popen() calls /bin/sh, which may/may not be bash. Script when run on dash and encounters the line, yields: exit: Illegal number: -1 checkbashisms report on script content: possible bashism (exit|return with negative status code): exit -1 Remove the bashism and use the more portable non-zero failure status code 1. Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Thomas Richter Link: http://lkml.kernel.org/r/20180629124652.8d0af7e2281fd3fd8262cacc@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/llvm-utils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 4952b429caa7..2bdaac048a0a 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -265,16 +265,16 @@ static const char *kinc_fetch_script = "#!/usr/bin/env sh\n" "if ! test -d \"$KBUILD_DIR\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "TMPDIR=`mktemp -d`\n" "if test -z \"$TMPDIR\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "cat << EOF > $TMPDIR/Makefile\n" "obj-y := dummy.o\n" -- GitLab From d1d2e7d014d261d435baf0ca12ad0b132cea9188 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Jul 2018 15:42:01 +0200 Subject: [PATCH 0983/1291] perf tools: Fix compilation errors on gcc8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a09603f851045b031e990d2d663958ccb49db525 ] We are getting following warnings on gcc8 that break compilation: $ make CC jvmti/jvmti_agent.o jvmti/jvmti_agent.c: In function ‘jvmti_open’: jvmti/jvmti_agent.c:252:35: error: ‘/jit-’ directive output may be truncated \ writing 5 bytes into a region of size between 1 and 4096 [-Werror=format-truncation=] snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); There's no point in checking the result of snprintf call in jvmti_open, the following open call will fail in case the name is mangled or too long. Using tools/lib/ function scnprintf that touches the return value from the snprintf() calls and thus get rid of those warnings. $ make DEBUG=1 CC arch/x86/util/perf_regs.o arch/x86/util/perf_regs.c: In function ‘arch_sdt_arg_parse_op’: arch/x86/util/perf_regs.c:229:4: error: ‘strncpy’ output truncated before terminating nul copying 2 bytes from a string of the same length [-Werror=stringop-truncation] strncpy(prefix, "+0", 2); ^~~~~~~~~~~~~~~~~~~~~~~~ Using scnprintf instead of the strncpy (which we know is safe in here) to get rid of that warning. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180702134202.17745-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/x86/util/perf_regs.c | 2 +- tools/perf/jvmti/jvmti_agent.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index 4b2caf6d48e7..fead6b3b4206 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -226,7 +226,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) else if (rm[2].rm_so != rm[2].rm_eo) prefix[0] = '+'; else - strncpy(prefix, "+0", 2); + scnprintf(prefix, sizeof(prefix), "+0"); } /* Rename register */ diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index cf36de7ea255..c1d20d951434 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -35,6 +35,7 @@ #include #include /* for gettid() */ #include +#include #include "jvmti_agent.h" #include "../util/jitdump.h" @@ -249,7 +250,7 @@ void *jvmti_open(void) /* * jitdump file name */ - snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + scnprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666); if (fd == -1) -- GitLab From 953c9cddc99983d4bd78936a42c28d5330ccae61 Mon Sep 17 00:00:00 2001 From: Janne Huttunen Date: Mon, 9 Jul 2018 13:59:50 +0300 Subject: [PATCH 0984/1291] perf script python: Fix dict reference counting [ Upstream commit db0ba84c04ef2cf293aaada5ae97531127844d9d ] The dictionaries are attached to the parameter tuple that steals the references and takes care of releasing them when appropriate. The code should not decrement the reference counts explicitly. E.g. if libpython has been built with reference debugging enabled, the superfluous DECREFs will trigger this error when running perf script: Fatal Python error: Objects/tupleobject.c:238 object at 0x7f10f2041b40 has negative ref count -1 Aborted (core dumped) If the reference debugging is not enabled, the superfluous DECREFs might cause the dict objects to be silently released while they are still in use. This may trigger various other assertions or just cause perf crashes and/or weird and unexpected data changes in the stored Python objects. Signed-off-by: Janne Huttunen Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jaroslav Skarvada Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1531133990-17485-1-git-send-email-janne.huttunen@nokia.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/scripting-engines/trace-event-python.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c7187f067d31..f03fa7a835a1 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -643,14 +643,11 @@ static void python_process_tracepoint(struct perf_sample *sample, if (_PyTuple_Resize(&t, n) == -1) Py_FatalError("error resizing Python tuple"); - if (!dict) { + if (!dict) call_object(handler, t, handler_name); - } else { + else call_object(handler, t, default_handler_name); - Py_DECREF(dict); - } - Py_XDECREF(all_entries_dict); Py_DECREF(t); } @@ -970,7 +967,6 @@ static void python_process_general_event(struct perf_sample *sample, call_object(handler, t, handler_name); - Py_DECREF(dict); Py_DECREF(t); } -- GitLab From a6629efeef9d9bbb76d9e7cdf0262ad07e9f1651 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 11 Jul 2018 10:10:11 -0700 Subject: [PATCH 0985/1291] nfit: fix unchecked dereference in acpi_nfit_ctl [ Upstream commit ee6581ceba7f8314b81b2f2a81f1cf3f67c679e2 ] Incremental patch to fix the unchecked dereference in acpi_nfit_ctl. Reported by Dan Carpenter: "acpi/nfit: fix cmd_rc for acpi_nfit_ctl to always return a value" from Jun 28, 2018, leads to the following Smatch complaint: drivers/acpi/nfit/core.c:578 acpi_nfit_ctl() warn: variable dereferenced before check 'cmd_rc' (see line 411) drivers/acpi/nfit/core.c 410 411 *cmd_rc = -EINVAL; ^^^^^^^^^^^^^^^^^^ Patch adds unchecked dereference. Fixes: c1985cefd844 ("acpi/nfit: fix cmd_rc for acpi_nfit_ctl to always return a value") Signed-off-by: Dave Jiang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 56f242f34fda..8260b90eb64b 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -224,7 +224,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, const guid_t *guid; int rc, i; - *cmd_rc = -EINVAL; + if (cmd_rc) + *cmd_rc = -EINVAL; func = cmd; if (cmd == ND_CMD_CALL) { call_pkg = buf; @@ -315,7 +316,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, * If we return an error (like elsewhere) then caller wouldn't * be able to rely upon data returned to make calculation. */ - *cmd_rc = 0; + if (cmd_rc) + *cmd_rc = 0; return 0; } -- GitLab From 26c7588c28caafda58ed88b45912dafc240c495e Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 10 Jul 2018 11:56:50 +0300 Subject: [PATCH 0986/1291] RDMA/mlx5: Fix memory leak in mlx5_ib_create_srq() error path [ Upstream commit d63c46734c545ad0488761059004a65c46efdde3 ] Fix memory leak in the error path of mlx5_ib_create_srq() by making sure to free the allocated srq. Fixes: c2b37f76485f ("IB/mlx5: Fix integer overflows in mlx5_ib_create_srq") Signed-off-by: Kamal Heib Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/srq.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 3c7522d025f2..93d67d97c279 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -266,18 +266,24 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, desc_size = sizeof(struct mlx5_wqe_srq_next_seg) + srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg); - if (desc_size == 0 || srq->msrq.max_gs > desc_size) - return ERR_PTR(-EINVAL); + if (desc_size == 0 || srq->msrq.max_gs > desc_size) { + err = -EINVAL; + goto err_srq; + } desc_size = roundup_pow_of_two(desc_size); desc_size = max_t(size_t, 32, desc_size); - if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) - return ERR_PTR(-EINVAL); + if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) { + err = -EINVAL; + goto err_srq; + } srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) / sizeof(struct mlx5_wqe_data_seg); srq->msrq.wqe_shift = ilog2(desc_size); buf_size = srq->msrq.max * desc_size; - if (buf_size < desc_size) - return ERR_PTR(-EINVAL); + if (buf_size < desc_size) { + err = -EINVAL; + goto err_srq; + } in.type = init_attr->srq_type; if (pd->uobject) -- GitLab From b900c624d467abc34c7dfbf873493cd4e9ab188f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 10 Jul 2018 08:22:40 +0100 Subject: [PATCH 0987/1291] ARM: 8780/1: ftrace: Only set kernel memory back to read-only after boot [ Upstream commit b4c7e2bd2eb4764afe3af9409ff3b1b87116fa30 ] Dynamic ftrace requires modifying the code segments that are usually set to read-only. To do this, a per arch function is called both before and after the ftrace modifications are performed. The "before" function will set kernel code text to read-write to allow for ftrace to make the modifications, and the "after" function will set the kernel code text back to "read-only" to keep the kernel code text protected. The issue happens when dynamic ftrace is tested at boot up. The test is done before the kernel code text has been set to read-only. But the "before" and "after" calls are still performed. The "after" call will change the kernel code text to read-only prematurely, and other boot code that expects this code to be read-write will fail. The solution is to add a variable that is set when the kernel code text is expected to be converted to read-only, and make the ftrace "before" and "after" calls do nothing if that variable is not yet set. This is similar to the x86 solution from commit 162396309745 ("ftrace, x86: make kernel text writable only for conversions"). Link: http://lkml.kernel.org/r/20180620212906.24b7b66e@vmware.local.home Reported-by: Stefan Agner Tested-by: Stefan Agner Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Russell King Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/init.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 0f6d1537f330..defb7fc26428 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -745,19 +745,28 @@ static int __mark_rodata_ro(void *unused) return 0; } +static int kernel_set_to_readonly __read_mostly; + void mark_rodata_ro(void) { + kernel_set_to_readonly = 1; stop_machine(__mark_rodata_ro, NULL, NULL); } void set_kernel_text_rw(void) { + if (!kernel_set_to_readonly) + return; + set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), false, current->active_mm); } void set_kernel_text_ro(void) { + if (!kernel_set_to_readonly) + return; + set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true, current->active_mm); } -- GitLab From e77c08b7c6f0a93ac736425d3728038300a009cd Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 10 Jul 2018 14:47:25 -0500 Subject: [PATCH 0988/1291] ARM: DRA7/OMAP5: Enable ACTLR[0] (Enable invalidates of BTB) for secondary cores [ Upstream commit 2f8b5b21830aea95989a6e67d8a971297272a086 ] Call secure services to enable ACTLR[0] (Enable invalidates of BTB with ICIALLU) when branch hardening is enabled for kernel. On GP devices OMAP5/DRA7, there is no possibility to update secure side since "secure world" is ROM and there are no override mechanisms possible. On HS devices, appropriate PPA should do the workarounds as well. However, the configuration is only done for secondary core, since it is expected that firmware/bootloader will have enabled the required configuration for the primary boot core (note: bootloaders typically will NOT enable secondary processors, since it has no need to do so). Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/omap-smp.c | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index 69df3620eca5..1c73694c871a 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -109,6 +109,45 @@ void omap5_erratum_workaround_801819(void) static inline void omap5_erratum_workaround_801819(void) { } #endif +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +/* + * Configure ACR and enable ACTLR[0] (Enable invalidates of BTB with + * ICIALLU) to activate the workaround for secondary Core. + * NOTE: it is assumed that the primary core's configuration is done + * by the boot loader (kernel will detect a misconfiguration and complain + * if this is not done). + * + * In General Purpose(GP) devices, ACR bit settings can only be done + * by ROM code in "secure world" using the smc call and there is no + * option to update the "firmware" on such devices. This also works for + * High security(HS) devices, as a backup option in case the + * "update" is not done in the "security firmware". + */ +static void omap5_secondary_harden_predictor(void) +{ + u32 acr, acr_mask; + + asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr)); + + /* + * ACTLR[0] (Enable invalidates of BTB with ICIALLU) + */ + acr_mask = BIT(0); + + /* Do we already have it done.. if yes, skip expensive smc */ + if ((acr & acr_mask) == acr_mask) + return; + + acr |= acr_mask; + omap_smc1(OMAP5_DRA7_MON_SET_ACR_INDEX, acr); + + pr_debug("%s: ARM ACR setup for CVE_2017_5715 applied on CPU%d\n", + __func__, smp_processor_id()); +} +#else +static inline void omap5_secondary_harden_predictor(void) { } +#endif + static void omap4_secondary_init(unsigned int cpu) { /* @@ -131,6 +170,8 @@ static void omap4_secondary_init(unsigned int cpu) set_cntfreq(); /* Configure ACR to disable streaming WA for 801819 */ omap5_erratum_workaround_801819(); + /* Enable ACR to allow for ICUALLU workaround */ + omap5_secondary_harden_predictor(); } /* -- GitLab From 95b08af4026956f8324aec9c883ec084b7c58101 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Wed, 11 Jul 2018 12:54:54 -0500 Subject: [PATCH 0989/1291] ARM: dts: am3517.dtsi: Disable reference to OMAP3 OTG controller [ Upstream commit 923847413f7316b5ced3491769b3fefa6c56a79a ] The AM3517 has a different OTG controller location than the OMAP3, which is included from omap3.dtsi. This results in a hwmod error. Since the AM3517 has a different OTG controller address, this patch disabes one that is isn't available. Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/am3517.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/am3517.dtsi b/arch/arm/boot/dts/am3517.dtsi index 00da3f2c4072..4b57094a0356 100644 --- a/arch/arm/boot/dts/am3517.dtsi +++ b/arch/arm/boot/dts/am3517.dtsi @@ -87,6 +87,11 @@ hecc: can@5c050000 { }; }; +/* Table Table 5-79 of the TRM shows 480ab000 is reserved */ +&usb_otg_hs { + status = "disabled"; +}; + &iva { status = "disabled"; }; -- GitLab From bd43d233acaa0a80dbc2ae5e605374719eb4bdf7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 18 Jun 2018 12:02:00 -0400 Subject: [PATCH 0990/1291] ixgbe: Be more careful when modifying MAC filters [ Upstream commit d14c780c11fbc10f66c43e7b64eefe87ca442bd3 ] This change makes it so that we are much more explicit about the ordering of updates to the receive address register (RAR) table. Prior to this patch I believe we may have been updating the table while entries were still active, or possibly allowing for reordering of things since we weren't explicitly flushing writes to either the lower or upper portion of the register prior to accessing the other half. Signed-off-by: Alexander Duyck Reviewed-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 64429a14c630..815284fe9324 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -1895,7 +1895,12 @@ s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, if (enable_addr != 0) rar_high |= IXGBE_RAH_AV; + /* Record lower 32 bits of MAC address and then make + * sure that write is flushed to hardware before writing + * the upper 16 bits and setting the valid bit. + */ IXGBE_WRITE_REG(hw, IXGBE_RAL(index), rar_low); + IXGBE_WRITE_FLUSH(hw); IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high); return 0; @@ -1927,8 +1932,13 @@ s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index) rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index)); rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV); - IXGBE_WRITE_REG(hw, IXGBE_RAL(index), 0); + /* Clear the address valid bit and upper 16 bits of the address + * before clearing the lower bits. This way we aren't updating + * a live filter. + */ IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high); + IXGBE_WRITE_FLUSH(hw); + IXGBE_WRITE_REG(hw, IXGBE_RAL(index), 0); /* clear VMDq pool/queue selection for this RAR */ hw->mac.ops.clear_vmdq(hw, index, IXGBE_CLEAR_VMDQ_ALL); -- GitLab From abf9fb6e1537f324d9de965a9928e12dabbc5f47 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 9 Jul 2018 17:45:57 -0700 Subject: [PATCH 0991/1291] tools: build: Use HOSTLDFLAGS with fixdep [ Upstream commit 8b247a92ebd0cda7dec49a6f771d9c4950f3d3ad ] The final link of fixdep uses LDFLAGS but not the existing HOSTLDFLAGS. Fix this. Signed-off-by: Laura Abbott Acked-by: Jiri Olsa Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/build/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/build/Makefile b/tools/build/Makefile index 5eb4b5ad79cb..5edf65e684ab 100644 --- a/tools/build/Makefile +++ b/tools/build/Makefile @@ -43,7 +43,7 @@ $(OUTPUT)fixdep-in.o: FORCE $(Q)$(MAKE) $(build)=fixdep $(OUTPUT)fixdep: $(OUTPUT)fixdep-in.o - $(QUIET_LINK)$(HOSTCC) $(LDFLAGS) -o $@ $< + $(QUIET_LINK)$(HOSTCC) $(HOSTLDFLAGS) -o $@ $< FORCE: -- GitLab From edf81993dc293d9048fce2076a0b9182cd6ac07e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 12 Jul 2018 19:38:36 +0900 Subject: [PATCH 0992/1291] kbuild: suppress warnings from 'getconf LFS_*' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6d79a7b424a5630a6fcab31fd7c38af4ea9c9a0f ] Suppress warnings for systems that do not recognize LFS_*. getconf: no such configuration parameter `LFS_CFLAGS' getconf: no such configuration parameter `LFS_LDFLAGS' getconf: no such configuration parameter `LFS_LIBS' Fixes: d7f14c66c273 ("kbuild: Enable Large File Support for hostprogs") Reported-by: Chen Feng Signed-off-by: Masahiro Yamada Acked-by: Uwe Kleine-König Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index e69d0d091742..a495efcf3e9b 100644 --- a/Makefile +++ b/Makefile @@ -357,9 +357,9 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ else if [ -x /bin/bash ]; then echo /bin/bash; \ else echo sh; fi ; fi) -HOST_LFS_CFLAGS := $(shell getconf LFS_CFLAGS) -HOST_LFS_LDFLAGS := $(shell getconf LFS_LDFLAGS) -HOST_LFS_LIBS := $(shell getconf LFS_LIBS) +HOST_LFS_CFLAGS := $(shell getconf LFS_CFLAGS 2>/dev/null) +HOST_LFS_LDFLAGS := $(shell getconf LFS_LDFLAGS 2>/dev/null) +HOST_LFS_LIBS := $(shell getconf LFS_LIBS 2>/dev/null) HOSTCC = gcc HOSTCXX = g++ -- GitLab From 75425326b88ccdbcdecc652b1f6f672c0bd286d0 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 11 Jul 2018 12:00:45 -0400 Subject: [PATCH 0993/1291] packet: reset network header if packet shorter than ll reserved space [ Upstream commit 993675a3100b16a4c80dfd70cbcde8ea7127b31d ] If variable length link layer headers result in a packet shorter than dev->hard_header_len, reset the network header offset. Else skb->mac_len may exceed skb->len after skb_mac_reset_len. packet_sendmsg_spkt already has similar logic. Fixes: b84bbaf7a6c8 ("packet: in packet_snd start writing at link layer allocation") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 27dafe36f29c..409b288c13d1 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2919,6 +2919,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_free; } else if (reserve) { skb_reserve(skb, -reserve); + if (len < reserve) + skb_reset_network_header(skb); } /* Returns -EFAULT on error */ -- GitLab From bf106eaa181c64373442c75f0dbd602b77cf5233 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 12 Jul 2018 15:23:45 +0300 Subject: [PATCH 0994/1291] qlogic: check kstrtoul() for errors [ Upstream commit 5fc853cc01c68f84984ecc2d5fd777ecad78240f ] We accidentally left out the error handling for kstrtoul(). Fixes: a520030e326a ("qlcnic: Implement flash sysfs callback for 83xx adapter") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c index 287d89dd086f..9c94240bb05a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c @@ -1128,6 +1128,8 @@ static ssize_t qlcnic_83xx_sysfs_flash_write_handler(struct file *filp, struct qlcnic_adapter *adapter = dev_get_drvdata(dev); ret = kstrtoul(buf, 16, &data); + if (ret) + return ret; switch (data) { case QLC_83XX_FLASH_SECTOR_ERASE_CMD: -- GitLab From 2d2eacd650c6a01ef90c34c7d99f82854eb7fd96 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jul 2018 06:04:53 -0700 Subject: [PATCH 0995/1291] tcp: remove DELAYED ACK events in DCTCP [ Upstream commit a69258f7aa2623e0930212f09c586fd06674ad79 ] After fixing the way DCTCP tracking delayed ACKs, the delayed-ACK related callbacks are no longer needed Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Lawrence Brakmo Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 2 -- net/ipv4/tcp_dctcp.c | 25 ------------------------- net/ipv4/tcp_output.c | 4 ---- 3 files changed, 31 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 686e33ea76e7..eca8d65cad1e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -938,8 +938,6 @@ enum tcp_ca_event { CA_EVENT_LOSS, /* loss timeout */ CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */ CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */ - CA_EVENT_DELAYED_ACK, /* Delayed ack is sent */ - CA_EVENT_NON_DELAYED_ACK, }; /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 1a9b88c8cf72..8b637f9f23a2 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -55,7 +55,6 @@ struct dctcp { u32 dctcp_alpha; u32 next_seq; u32 ce_state; - u32 delayed_ack_reserved; u32 loss_cwnd; }; @@ -96,7 +95,6 @@ static void dctcp_init(struct sock *sk) ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); - ca->delayed_ack_reserved = 0; ca->loss_cwnd = 0; ca->ce_state = 0; @@ -230,25 +228,6 @@ static void dctcp_state(struct sock *sk, u8 new_state) } } -static void dctcp_update_ack_reserved(struct sock *sk, enum tcp_ca_event ev) -{ - struct dctcp *ca = inet_csk_ca(sk); - - switch (ev) { - case CA_EVENT_DELAYED_ACK: - if (!ca->delayed_ack_reserved) - ca->delayed_ack_reserved = 1; - break; - case CA_EVENT_NON_DELAYED_ACK: - if (ca->delayed_ack_reserved) - ca->delayed_ack_reserved = 0; - break; - default: - /* Don't care for the rest. */ - break; - } -} - static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) { switch (ev) { @@ -258,10 +237,6 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) case CA_EVENT_ECN_NO_CE: dctcp_ce_state_1_to_0(sk); break; - case CA_EVENT_DELAYED_ACK: - case CA_EVENT_NON_DELAYED_ACK: - dctcp_update_ack_reserved(sk, ev); - break; default: /* Don't care for the rest. */ break; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3d8f6f342cb1..b2ead31afcba 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3513,8 +3513,6 @@ void tcp_send_delayed_ack(struct sock *sk) int ato = icsk->icsk_ack.ato; unsigned long timeout; - tcp_ca_event(sk, CA_EVENT_DELAYED_ACK); - if (ato > TCP_DELACK_MIN) { const struct tcp_sock *tp = tcp_sk(sk); int max_ato = HZ / 2; @@ -3571,8 +3569,6 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) if (sk->sk_state == TCP_CLOSE) return; - tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK); - /* We are not putting this on the write queue, so * tcp_transmit_skb() will set the ownership to this * sock. -- GitLab From 87011fb97fe5a4b05c7c7c532502b45947fc4709 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 27 Jun 2018 13:49:02 +0200 Subject: [PATCH 0996/1291] pinctrl: ingenic: Fix inverted direction for < JZ4770 [ Upstream commit 0084a786ca8c84b443f67c4a697b4f2552761650 ] The .gpio_set_direction() callback was setting inverted direction for SoCs older than the JZ4770, this restores the correct behaviour. Signed-off-by: Paul Cercueil Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-ingenic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index d84761822243..103aaab41357 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -536,7 +536,7 @@ static int ingenic_pinmux_gpio_set_direction(struct pinctrl_dev *pctldev, ingenic_config_pin(jzpc, pin, JZ4770_GPIO_PAT1, input); } else { ingenic_config_pin(jzpc, pin, JZ4740_GPIO_SELECT, false); - ingenic_config_pin(jzpc, pin, JZ4740_GPIO_DIR, input); + ingenic_config_pin(jzpc, pin, JZ4740_GPIO_DIR, !input); ingenic_config_pin(jzpc, pin, JZ4740_GPIO_FUNC, false); } -- GitLab From 54046f1793c7a35e7b8c63964c181faf384bcc38 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Jul 2018 15:04:25 +0300 Subject: [PATCH 0997/1291] pinctrl: nsp: off by ones in nsp_pinmux_enable() [ Upstream commit f90a21c898db58eaea14b8ad7e9af3b9e15e5f8a ] The > comparisons should be >= or else we read beyond the end of the pinctrl->functions[] array. Fixes: cc4fa83f66e9 ("pinctrl: nsp: add pinmux driver support for Broadcom NSP SoC") Signed-off-by: Dan Carpenter Reviewed-by: Ray Jui Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/bcm/pinctrl-nsp-mux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-mux.c b/drivers/pinctrl/bcm/pinctrl-nsp-mux.c index 35c17653c694..5cd8166fbbc8 100644 --- a/drivers/pinctrl/bcm/pinctrl-nsp-mux.c +++ b/drivers/pinctrl/bcm/pinctrl-nsp-mux.c @@ -460,8 +460,8 @@ static int nsp_pinmux_enable(struct pinctrl_dev *pctrl_dev, const struct nsp_pin_function *func; const struct nsp_pin_group *grp; - if (grp_select > pinctrl->num_groups || - func_select > pinctrl->num_functions) + if (grp_select >= pinctrl->num_groups || + func_select >= pinctrl->num_functions) return -EINVAL; func = &pinctrl->functions[func_select]; -- GitLab From b6fadd93ad962be7584b9df8b56897293c1d975f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 11 Jul 2018 12:34:21 +0000 Subject: [PATCH 0998/1291] pinctrl: nsp: Fix potential NULL dereference [ Upstream commit c29e9da56bebb4c2c794e871b0dc0298bbf08142 ] platform_get_resource() may fail and return NULL, so we should better check it's return value to avoid a NULL pointer dereference a bit later in the code. This is detected by Coccinelle semantic patch. @@ expression pdev, res, n, t, e, e1, e2; @@ res = platform_get_resource(pdev, t, n); + if (!res) + return -EINVAL; ... when != res == NULL e = devm_ioremap_nocache(e1, res->start, e2); Fixes: cc4fa83f66e9 ("pinctrl: nsp: add pinmux driver support for Broadcom NSP SoC") Signed-off-by: Wei Yongjun Reviewed-by: Ray Jui Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/bcm/pinctrl-nsp-mux.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-mux.c b/drivers/pinctrl/bcm/pinctrl-nsp-mux.c index 5cd8166fbbc8..87618a4e90e4 100644 --- a/drivers/pinctrl/bcm/pinctrl-nsp-mux.c +++ b/drivers/pinctrl/bcm/pinctrl-nsp-mux.c @@ -577,6 +577,8 @@ static int nsp_pinmux_probe(struct platform_device *pdev) return PTR_ERR(pinctrl->base0); res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) + return -EINVAL; pinctrl->base1 = devm_ioremap_nocache(&pdev->dev, res->start, resource_size(res)); if (!pinctrl->base1) { -- GitLab From dcedabcab0457f17df9c7ff7d66cd27aa517b685 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Jul 2018 15:30:56 +0300 Subject: [PATCH 0999/1291] drm/nouveau/gem: off by one bugs in nouveau_gem_pushbuf_reloc_apply() [ Upstream commit 7f073d011f93e92d4d225526b9ab6b8b0bbd6613 ] The bo array has req->nr_buffers elements so the > should be >= so we don't read beyond the end of the array. Fixes: a1606a9596e5 ("drm/nouveau: new gem pushbuf interface, bump to 0.0.16") Signed-off-by: Dan Carpenter Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 2170534101ca..60ffb70bb908 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -599,7 +599,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, struct nouveau_bo *nvbo; uint32_t data; - if (unlikely(r->bo_index > req->nr_buffers)) { + if (unlikely(r->bo_index >= req->nr_buffers)) { NV_PRINTK(err, cli, "reloc bo index invalid\n"); ret = -EINVAL; break; @@ -609,7 +609,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, if (b->presumed.valid) continue; - if (unlikely(r->reloc_bo_index > req->nr_buffers)) { + if (unlikely(r->reloc_bo_index >= req->nr_buffers)) { NV_PRINTK(err, cli, "reloc container bo index invalid\n"); ret = -EINVAL; break; -- GitLab From 7b4ab0e6510bdef33cbb9e3b4a5471d252d3d295 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 13 Jul 2018 10:38:38 -0700 Subject: [PATCH 1000/1291] hv/netvsc: fix handling of fallback to single queue mode [ Upstream commit 916c5e1413be058d1c1f6e502db350df890730ce ] The netvsc device may need to fallback to running in single queue mode if host side only wants to support single queue. Recent change for handling mtu broke this in setup logic. Reported-by: Dan Carpenter Fixes: 3ffe64f1a641 ("hv_netvsc: split sub-channel setup into async and sync") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/rndis_filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index cb03a6ea076a..328c37e9096d 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1299,6 +1299,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, /* setting up multiple channels failed */ net_device->max_chn = 1; net_device->num_chn = 1; + return 0; err_dev_remv: rndis_filter_device_remove(dev, net_device); -- GitLab From a6c35a5c87509ebaf6cdbc5b035a9184d690b1ba Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 13 Jul 2018 21:25:19 -0700 Subject: [PATCH 1001/1291] net/ethernet/freescale/fman: fix cross-build error [ Upstream commit c133459765fae249ba482f62e12f987aec4376f0 ] CC [M] drivers/net/ethernet/freescale/fman/fman.o In file included from ../drivers/net/ethernet/freescale/fman/fman.c:35: ../include/linux/fsl/guts.h: In function 'guts_set_dmacr': ../include/linux/fsl/guts.h:165:2: error: implicit declaration of function 'clrsetbits_be32' [-Werror=implicit-function-declaration] clrsetbits_be32(&guts->dmacr, 3 << shift, device << shift); ^~~~~~~~~~~~~~~ Signed-off-by: Randy Dunlap Cc: Madalin Bucur Cc: netdev@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/guts.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/fsl/guts.h b/include/linux/fsl/guts.h index 3efa3b861d44..941b11811f85 100644 --- a/include/linux/fsl/guts.h +++ b/include/linux/fsl/guts.h @@ -16,6 +16,7 @@ #define __FSL_GUTS_H__ #include +#include /** * Global Utility Registers. -- GitLab From 2daadcdc58278f0d57e4ed23e6841e3a0e059ebf Mon Sep 17 00:00:00 2001 From: John Allen Date: Mon, 16 Jul 2018 10:29:30 -0500 Subject: [PATCH 1002/1291] ibmvnic: Fix error recovery on login failure [ Upstream commit 3578a7ecb69920efc3885dbd610e98c00dbdf5db ] Testing has uncovered a failure case that is not handled properly. In the event that a login fails and we are not able to recover on the spot, we return 0 from do_reset, preventing any error recovery code from being triggered. Additionally, the state is set to "probed" meaning that when we are able to trigger the error recovery, the driver always comes up in the probed state. To handle the case properly, we need to return a failure code here and set the adapter state to the state that we entered the reset in indicating the state that we would like to come out of the recovery reset in. Signed-off-by: John Allen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmvnic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 98493be7b4af..046af22a37cb 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1463,8 +1463,8 @@ static int do_reset(struct ibmvnic_adapter *adapter, rc = ibmvnic_login(netdev); if (rc) { - adapter->state = VNIC_PROBED; - return 0; + adapter->state = reset_state; + return rc; } rc = reset_tx_pools(adapter); -- GitLab From fc9caa36d80e7f3aaf0720df58a7611ae88b9a94 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 11 Jul 2018 13:41:21 +0800 Subject: [PATCH 1003/1291] btrfs: scrub: Don't use inode page cache in scrub_handle_errored_block() [ Upstream commit 665d4953cde6d9e75c62a07ec8f4f8fd7d396ade ] In commit ac0b4145d662 ("btrfs: scrub: Don't use inode pages for device replace") we removed the branch of copy_nocow_pages() to avoid corruption for compressed nodatasum extents. However above commit only solves the problem in scrub_extent(), if during scrub_pages() we failed to read some pages, sctx->no_io_error_seen will be non-zero and we go to fixup function scrub_handle_errored_block(). In scrub_handle_errored_block(), for sctx without csum (no matter if we're doing replace or scrub) we go to scrub_fixup_nodatasum() routine, which does the similar thing with copy_nocow_pages(), but does it without the extra check in copy_nocow_pages() routine. So for test cases like btrfs/100, where we emulate read errors during replace/scrub, we could corrupt compressed extent data again. This patch will fix it just by avoiding any "optimization" for nodatasum, just falls back to the normal fixup routine by try read from any good copy. This also solves WARN_ON() or dead lock caused by lame backref iteration in scrub_fixup_nodatasum() routine. The deadlock or WARN_ON() won't be triggered before commit ac0b4145d662 ("btrfs: scrub: Don't use inode pages for device replace") since copy_nocow_pages() have better locking and extra check for data extent, and it's already doing the fixup work by try to read data from any good copy, so it won't go scrub_fixup_nodatasum() anyway. This patch disables the faulty code and will be removed completely in a followup patch. Fixes: ac0b4145d662 ("btrfs: scrub: Don't use inode pages for device replace") Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/scrub.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 936d58ca2b49..61192c536e6c 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1166,11 +1166,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) return ret; } - if (sctx->is_dev_replace && !is_metadata && !have_csum) { - sblocks_for_recheck = NULL; - goto nodatasum_case; - } - /* * read all mirrors one after the other. This includes to * re-read the extent or metadata block that failed (that was @@ -1283,13 +1278,19 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) goto out; } - if (!is_metadata && !have_csum) { + /* + * NOTE: Even for nodatasum case, it's still possible that it's a + * compressed data extent, thus scrub_fixup_nodatasum(), which write + * inode page cache onto disk, could cause serious data corruption. + * + * So here we could only read from disk, and hope our recovery could + * reach disk before the newer write. + */ + if (0 && !is_metadata && !have_csum) { struct scrub_fixup_nodatasum *fixup_nodatasum; WARN_ON(sctx->is_dev_replace); -nodatasum_case: - /* * !is_metadata and !have_csum, this means that the data * might not be COWed, that it might be modified -- GitLab From 70cc6b67508ca7a940f463a1aa12869f1f59fd7a Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 13 Jul 2018 17:31:50 +0200 Subject: [PATCH 1004/1291] octeon_mgmt: Fix MIX registers configuration on MTU setup [ Upstream commit 4aac0b43474d18f6160302a3caa147d77fa3baa1 ] octeon_mgmt driver doesn't drop RX frames that are 1-4 bytes bigger than MTU set for the corresponding interface. The problem is in the AGL_GMX_RX0/1_FRM_MAX register setting, which should not account for VLAN tagging. According to Octeon HW manual: "For tagged frames, MAX increases by four bytes for each VLAN found up to a maximum of two VLANs, or MAX + 8 bytes." OCTEON_FRAME_HEADER_LEN "define" is fine for ring buffer management, but should not be used for AGL_GMX_RX0/1_FRM_MAX. The problem could be easily reproduced using "ping" command. If affected system has default MTU 1500, other host (having MTU >= 1504) can successfully "ping" the affected system with payload size 1473-1476, resulting in IP packets of size 1501-1504 accepted by the mgmt driver. Fixed system still accepts IP packets of 1500 bytes even with VLAN tagging, because the limits are lifted in HW as expected, for every VLAN tag. Signed-off-by: Alexander Sverdlin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 2887bcaf6af5..45c51277e0cf 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -643,13 +643,21 @@ static int octeon_mgmt_set_mac_address(struct net_device *netdev, void *addr) static int octeon_mgmt_change_mtu(struct net_device *netdev, int new_mtu) { struct octeon_mgmt *p = netdev_priv(netdev); - int size_without_fcs = new_mtu + OCTEON_MGMT_RX_HEADROOM; + int max_packet = new_mtu + ETH_HLEN + ETH_FCS_LEN; netdev->mtu = new_mtu; - cvmx_write_csr(p->agl + AGL_GMX_RX_FRM_MAX, size_without_fcs); + /* HW lifts the limit if the frame is VLAN tagged + * (+4 bytes per each tag, up to two tags) + */ + cvmx_write_csr(p->agl + AGL_GMX_RX_FRM_MAX, max_packet); + /* Set the hardware to truncate packets larger than the MTU. The jabber + * register must be set to a multiple of 8 bytes, so round up. JABBER is + * an unconditional limit, so we need to account for two possible VLAN + * tags. + */ cvmx_write_csr(p->agl + AGL_GMX_RX_JABBER, - (size_without_fcs + 7) & 0xfff8); + (max_packet + 7 + VLAN_HLEN * 2) & 0xfff8); return 0; } -- GitLab From 1b1968d8f6ae913f5c0ec55893d324195d234fbe Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 16 Jul 2018 17:58:10 -0500 Subject: [PATCH 1005/1291] net: usb: rtl8150: demote allmulti message to dev_dbg() [ Upstream commit 3a9b0455062ffb9d2f6cd4473a76e3456f318c9f ] This driver can spam the kernel log with multiple messages of: net eth0: eth0: allmulti set Usually 4 or 8 at a time (probably because of using ConnMan). This message doesn't seem useful, so let's demote it from dev_info() to dev_dbg(). Signed-off-by: David Lechner Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/rtl8150.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 5f565bd574da..48ba80a8ca5c 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -681,7 +681,7 @@ static void rtl8150_set_multicast(struct net_device *netdev) (netdev->flags & IFF_ALLMULTI)) { rx_creg &= 0xfffe; rx_creg |= 0x0002; - dev_info(&netdev->dev, "%s: allmulti set\n", netdev->name); + dev_dbg(&netdev->dev, "%s: allmulti set\n", netdev->name); } else { /* ~RX_MULTICAST, ~RX_PROMISCUOUS */ rx_creg &= 0x00fc; -- GitLab From 114a5608cde4197870d4f217c69f27ea20f9350c Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Sat, 14 Jul 2018 23:28:29 +0800 Subject: [PATCH 1006/1291] kvmclock: fix TSC calibration for nested guests [ Upstream commit e10f7805032365cc11c739a97f226ebb48aee042 ] Inside a nested guest, access to hardware can be slow enough that tsc_read_refs always return ULLONG_MAX, causing tsc_refine_calibration_work to be called periodically and the nested guest to spend a lot of time reading the ACPI timer. However, if the TSC frequency is available from the pvclock page, we can just set X86_FEATURE_TSC_KNOWN_FREQ and avoid the recalibration. 'refine' operation. Suggested-by: Peter Zijlstra Signed-off-by: Peng Hao [Commit message rewritten. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kvmclock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 5b609e28ce3f..48703d430a2f 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -143,6 +143,7 @@ static unsigned long kvm_get_tsc_khz(void) src = &hv_clock[cpu].pvti; tsc_khz = pvclock_tsc_khz(src); put_cpu(); + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); return tsc_khz; } -- GitLab From 0e66392d985c4595969f69350d8566d292ea12e7 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 18 Jul 2018 15:40:26 -0500 Subject: [PATCH 1007/1291] PCI: OF: Fix I/O space page leak commit a5fb9fb023a1435f2b42bccd7f547560f3a21dc3 upstream. When testing the R-Car PCIe driver on the Condor board, if the PCIe PHY driver was left disabled, the kernel crashed with this BUG: kernel BUG at lib/ioremap.c:72! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 39 Comm: kworker/0:1 Not tainted 4.17.0-dirty #1092 Hardware name: Renesas Condor board based on r8a77980 (DT) Workqueue: events deferred_probe_work_func pstate: 80000005 (Nzcv daif -PAN -UAO) pc : ioremap_page_range+0x370/0x3c8 lr : ioremap_page_range+0x40/0x3c8 sp : ffff000008da39e0 x29: ffff000008da39e0 x28: 00e8000000000f07 x27: ffff7dfffee00000 x26: 0140000000000000 x25: ffff7dfffef00000 x24: 00000000000fe100 x23: ffff80007b906000 x22: ffff000008ab8000 x21: ffff000008bb1d58 x20: ffff7dfffef00000 x19: ffff800009c30fb8 x18: 0000000000000001 x17: 00000000000152d0 x16: 00000000014012d0 x15: 0000000000000000 x14: 0720072007200720 x13: 0720072007200720 x12: 0720072007200720 x11: 0720072007300730 x10: 00000000000000ae x9 : 0000000000000000 x8 : ffff7dffff000000 x7 : 0000000000000000 x6 : 0000000000000100 x5 : 0000000000000000 x4 : 000000007b906000 x3 : ffff80007c61a880 x2 : ffff7dfffeefffff x1 : 0000000040000000 x0 : 00e80000fe100f07 Process kworker/0:1 (pid: 39, stack limit = 0x (ptrval)) Call trace: ioremap_page_range+0x370/0x3c8 pci_remap_iospace+0x7c/0xac pci_parse_request_of_pci_ranges+0x13c/0x190 rcar_pcie_probe+0x4c/0xb04 platform_drv_probe+0x50/0xbc driver_probe_device+0x21c/0x308 __device_attach_driver+0x98/0xc8 bus_for_each_drv+0x54/0x94 __device_attach+0xc4/0x12c device_initial_probe+0x10/0x18 bus_probe_device+0x90/0x98 deferred_probe_work_func+0xb0/0x150 process_one_work+0x12c/0x29c worker_thread+0x200/0x3fc kthread+0x108/0x134 ret_from_fork+0x10/0x18 Code: f9004ba2 54000080 aa0003fb 17ffff48 (d4210000) It turned out that pci_remap_iospace() wasn't undone when the driver's probe failed, and since devm_phy_optional_get() returned -EPROBE_DEFER, the probe was retried, finally causing the BUG due to trying to remap already remapped pages. Introduce the devm_pci_remap_iospace() managed API and replace the pci_remap_iospace() call with it to fix the bug. Fixes: dbf9826d5797 ("PCI: generic: Convert to DT resource parsing API") Signed-off-by: Sergei Shtylyov [lorenzo.pieralisi@arm.com: split commit/updated the commit log] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-host-common.c | 2 +- drivers/pci/host/pcie-rcar.c | 2 +- drivers/pci/pci.c | 38 ++++++++++++++++++++++++++++++ include/linux/pci.h | 2 ++ 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/drivers/pci/host/pci-host-common.c b/drivers/pci/host/pci-host-common.c index 44a47d4f0b8f..148896f73c06 100644 --- a/drivers/pci/host/pci-host-common.c +++ b/drivers/pci/host/pci-host-common.c @@ -45,7 +45,7 @@ static int gen_pci_parse_request_of_pci_ranges(struct device *dev, switch (resource_type(res)) { case IORESOURCE_IO: - err = pci_remap_iospace(res, iobase); + err = devm_pci_remap_iospace(dev, res, iobase); if (err) { dev_warn(dev, "error %d: failed to map resource %pR\n", err, res); diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c index 8f44a7d14bff..41edce16a07c 100644 --- a/drivers/pci/host/pcie-rcar.c +++ b/drivers/pci/host/pcie-rcar.c @@ -1105,7 +1105,7 @@ static int rcar_pcie_parse_request_of_pci_ranges(struct rcar_pcie *pci) struct resource *res = win->res; if (resource_type(res) == IORESOURCE_IO) { - err = pci_remap_iospace(res, iobase); + err = devm_pci_remap_iospace(dev, res, iobase); if (err) { dev_warn(dev, "error %d: failed to map resource %pR\n", err, res); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 62a0677b32f1..22924629e64a 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3446,6 +3446,44 @@ void pci_unmap_iospace(struct resource *res) } EXPORT_SYMBOL(pci_unmap_iospace); +static void devm_pci_unmap_iospace(struct device *dev, void *ptr) +{ + struct resource **res = ptr; + + pci_unmap_iospace(*res); +} + +/** + * devm_pci_remap_iospace - Managed pci_remap_iospace() + * @dev: Generic device to remap IO address for + * @res: Resource describing the I/O space + * @phys_addr: physical address of range to be mapped + * + * Managed pci_remap_iospace(). Map is automatically unmapped on driver + * detach. + */ +int devm_pci_remap_iospace(struct device *dev, const struct resource *res, + phys_addr_t phys_addr) +{ + const struct resource **ptr; + int error; + + ptr = devres_alloc(devm_pci_unmap_iospace, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + error = pci_remap_iospace(res, phys_addr); + if (error) { + devres_free(ptr); + } else { + *ptr = res; + devres_add(dev, ptr); + } + + return error; +} +EXPORT_SYMBOL(devm_pci_remap_iospace); + /** * devm_pci_remap_cfgspace - Managed pci_remap_cfgspace() * @dev: Generic device to remap IO address for diff --git a/include/linux/pci.h b/include/linux/pci.h index 727e309baa5e..9d6fae809c09 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1235,6 +1235,8 @@ int pci_register_io_range(phys_addr_t addr, resource_size_t size); unsigned long pci_address_to_pio(phys_addr_t addr); phys_addr_t pci_pio_to_address(unsigned long pio); int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr); +int devm_pci_remap_iospace(struct device *dev, const struct resource *res, + phys_addr_t phys_addr); void pci_unmap_iospace(struct resource *res); void __iomem *devm_pci_remap_cfgspace(struct device *dev, resource_size_t offset, -- GitLab From 96dfb7652e172b0ea77124928753904b64bc0261 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 18 Jul 2018 15:40:40 -0500 Subject: [PATCH 1008/1291] PCI: versatile: Fix I/O space page leak [ Upstream commit 0018b265adf7e251f90d3ca1c7c0e32e2a0ad262 ] When testing the R-Car PCIe driver on the Condor board, if the PCIe PHY driver was left disabled, the kernel crashed with this BUG: kernel BUG at lib/ioremap.c:72! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 39 Comm: kworker/0:1 Not tainted 4.17.0-dirty #1092 Hardware name: Renesas Condor board based on r8a77980 (DT) Workqueue: events deferred_probe_work_func pstate: 80000005 (Nzcv daif -PAN -UAO) pc : ioremap_page_range+0x370/0x3c8 lr : ioremap_page_range+0x40/0x3c8 sp : ffff000008da39e0 x29: ffff000008da39e0 x28: 00e8000000000f07 x27: ffff7dfffee00000 x26: 0140000000000000 x25: ffff7dfffef00000 x24: 00000000000fe100 x23: ffff80007b906000 x22: ffff000008ab8000 x21: ffff000008bb1d58 x20: ffff7dfffef00000 x19: ffff800009c30fb8 x18: 0000000000000001 x17: 00000000000152d0 x16: 00000000014012d0 x15: 0000000000000000 x14: 0720072007200720 x13: 0720072007200720 x12: 0720072007200720 x11: 0720072007300730 x10: 00000000000000ae x9 : 0000000000000000 x8 : ffff7dffff000000 x7 : 0000000000000000 x6 : 0000000000000100 x5 : 0000000000000000 x4 : 000000007b906000 x3 : ffff80007c61a880 x2 : ffff7dfffeefffff x1 : 0000000040000000 x0 : 00e80000fe100f07 Process kworker/0:1 (pid: 39, stack limit = 0x (ptrval)) Call trace: ioremap_page_range+0x370/0x3c8 pci_remap_iospace+0x7c/0xac pci_parse_request_of_pci_ranges+0x13c/0x190 rcar_pcie_probe+0x4c/0xb04 platform_drv_probe+0x50/0xbc driver_probe_device+0x21c/0x308 __device_attach_driver+0x98/0xc8 bus_for_each_drv+0x54/0x94 __device_attach+0xc4/0x12c device_initial_probe+0x10/0x18 bus_probe_device+0x90/0x98 deferred_probe_work_func+0xb0/0x150 process_one_work+0x12c/0x29c worker_thread+0x200/0x3fc kthread+0x108/0x134 ret_from_fork+0x10/0x18 Code: f9004ba2 54000080 aa0003fb 17ffff48 (d4210000) It turned out that pci_remap_iospace() wasn't undone when the driver's probe failed, and since devm_phy_optional_get() returned -EPROBE_DEFER, the probe was retried, finally causing the BUG due to trying to remap already remapped pages. The Versatile PCI controller driver has the same issue. Replace pci_remap_iospace() with the devm_ managed version to fix the bug. Fixes: b7e78170efd4 ("PCI: versatile: Add DT-based ARM Versatile PB PCIe host driver") Signed-off-by: Sergei Shtylyov [lorenzo.pieralisi@arm.com: updated the commit log] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-versatile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/host/pci-versatile.c b/drivers/pci/host/pci-versatile.c index d417acab0ecf..aff4cfb555fb 100644 --- a/drivers/pci/host/pci-versatile.c +++ b/drivers/pci/host/pci-versatile.c @@ -89,7 +89,7 @@ static int versatile_pci_parse_request_of_pci_ranges(struct device *dev, switch (resource_type(res)) { case IORESOURCE_IO: - err = pci_remap_iospace(res, iobase); + err = devm_pci_remap_iospace(dev, res, iobase); if (err) { dev_warn(dev, "error %d: failed to map resource %pR\n", err, res); -- GitLab From f600a10d05da9d88d4385053997fb7898ed1eca7 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 18 Jul 2018 08:31:43 +0200 Subject: [PATCH 1009/1291] net: qca_spi: Avoid packet drop during initial sync [ Upstream commit b2bab426dc715de147f8039a3fccff27d795f4eb ] As long as the synchronization with the QCA7000 isn't finished, we cannot accept packets from the upper layers. So let the SPI thread enable the TX queue after sync and avoid unwanted packet drop. Signed-off-by: Stefan Wahren Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qualcomm/qca_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 9c236298fe21..bffcf4986539 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -658,7 +658,7 @@ qcaspi_netdev_open(struct net_device *dev) return ret; } - netif_start_queue(qca->net_dev); + /* SPI thread takes care of TX queue */ return 0; } -- GitLab From 7f117d7000ee4c232032212dc9e74c59faa7ab81 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 18 Jul 2018 08:31:44 +0200 Subject: [PATCH 1010/1291] net: qca_spi: Make sure the QCA7000 reset is triggered [ Upstream commit 711c62dfa6bdb4326ca6c587f295ea5c4f7269de ] In case the SPI thread is not running, a simple reset of sync state won't fix the transmit timeout. We also need to wake up the kernel thread. Signed-off-by: Stefan Wahren Fixes: ed7d42e24eff ("net: qca_spi: fix transmit queue timeout handling") Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qualcomm/qca_spi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index bffcf4986539..fb348e044b4e 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -761,6 +761,9 @@ qcaspi_netdev_tx_timeout(struct net_device *dev) qca->net_dev->stats.tx_errors++; /* Trigger tx queue flush and QCA7000 reset */ qca->sync = QCASPI_SYNC_UNKNOWN; + + if (qca->spi_thread) + wake_up_process(qca->spi_thread); } static int -- GitLab From e86aaeb8a63015d321acb6bce2ce0358d794cfa3 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 18 Jul 2018 08:31:45 +0200 Subject: [PATCH 1011/1291] net: qca_spi: Fix log level if probe fails [ Upstream commit 50973993260a6934f0a00da53d9b746cfbea89ab ] In cases the probing fails the log level of the messages should be an error. Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qualcomm/qca_spi.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index fb348e044b4e..b1f5f0b8e546 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -882,22 +882,22 @@ qca_spi_probe(struct spi_device *spi) if ((qcaspi_clkspeed < QCASPI_CLK_SPEED_MIN) || (qcaspi_clkspeed > QCASPI_CLK_SPEED_MAX)) { - dev_info(&spi->dev, "Invalid clkspeed: %d\n", - qcaspi_clkspeed); + dev_err(&spi->dev, "Invalid clkspeed: %d\n", + qcaspi_clkspeed); return -EINVAL; } if ((qcaspi_burst_len < QCASPI_BURST_LEN_MIN) || (qcaspi_burst_len > QCASPI_BURST_LEN_MAX)) { - dev_info(&spi->dev, "Invalid burst len: %d\n", - qcaspi_burst_len); + dev_err(&spi->dev, "Invalid burst len: %d\n", + qcaspi_burst_len); return -EINVAL; } if ((qcaspi_pluggable < QCASPI_PLUGGABLE_MIN) || (qcaspi_pluggable > QCASPI_PLUGGABLE_MAX)) { - dev_info(&spi->dev, "Invalid pluggable: %d\n", - qcaspi_pluggable); + dev_err(&spi->dev, "Invalid pluggable: %d\n", + qcaspi_pluggable); return -EINVAL; } @@ -959,8 +959,8 @@ qca_spi_probe(struct spi_device *spi) } if (register_netdev(qcaspi_devs)) { - dev_info(&spi->dev, "Unable to register net device %s\n", - qcaspi_devs->name); + dev_err(&spi->dev, "Unable to register net device %s\n", + qcaspi_devs->name); free_netdev(qcaspi_devs); return -EFAULT; } -- GitLab From 90e7d66508e3db1c1de55b6e196597edb37940b3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 17 Jul 2018 18:27:45 -0700 Subject: [PATCH 1012/1291] tcp: identify cryptic messages as TCP seq # bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e56b8ce363a36fb7b74b80aaa5cc9084f2c908b4 ] Attempt to make cryptic TCP seq number error messages clearer by (1) identifying the source of the message as "TCP", (2) identifying the errors as "seq # bug", and (3) grouping the field identifiers and values by separating them with commas. E.g., the following message is changed from: recvmsg bug 2: copied 73BCB6CD seq 70F17CBE rcvnxt 73BCB9AA fl 0 WARNING: CPU: 2 PID: 1501 at /linux/net/ipv4/tcp.c:1881 tcp_recvmsg+0x649/0xb90 to: TCP recvmsg seq # bug 2: copied 73BCB6CD, seq 70F17CBE, rcvnxt 73BCB9AA, fl 0 WARNING: CPU: 2 PID: 1501 at /linux/net/ipv4/tcp.c:2011 tcp_recvmsg+0x694/0xba0 Suggested-by: 積丹尼 Dan Jacobson Signed-off-by: Randy Dunlap Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e81ff9d545a4..7462ec7587ce 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1837,7 +1837,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, * shouldn't happen. */ if (WARN(before(*seq, TCP_SKB_CB(skb)->seq), - "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n", + "TCP recvmsg seq # bug: copied %X, seq %X, rcvnxt %X, fl %X\n", *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags)) break; @@ -1852,7 +1852,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; WARN(!(flags & MSG_PEEK), - "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n", + "TCP recvmsg seq # bug 2: copied %X, seq %X, rcvnxt %X, fl %X\n", *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags); } -- GitLab From 715f5f92aabfa17ab4b7a39cf67b67d6d6eef68b Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 17 Jul 2018 11:28:46 +0800 Subject: [PATCH 1013/1291] soc: imx: gpc: restrict register range for regmap access [ Upstream commit de2d9b5284bcb5c159c5882ac69f6bfd4dec7c67 ] GPC registers are NOT continuous, some registers are reserved and accessing them from userspace will trigger external abort, add regmap register access table to avoid below abort: root@imx6slevk:~# cat /sys/kernel/debug/regmap/20dc000.gpc/registers [ 108.480477] Unhandled fault: imprecise external abort (0x1406) at 0xb6db5004 [ 108.487985] pgd = 42b54bfd [ 108.490741] [b6db5004] *pgd=ba1b7831 [ 108.494386] Internal error: : 1406 [#1] SMP ARM [ 108.498943] Modules linked in: [ 108.502043] CPU: 0 PID: 389 Comm: cat Not tainted 4.18.0-rc1-00074-gc9f1f60-dirty #482 [ 108.509982] Hardware name: Freescale i.MX6 SoloLite (Device Tree) [ 108.516123] PC is at regmap_mmio_read32le+0x20/0x24 [ 108.521031] LR is at regmap_mmio_read+0x40/0x60 [ 108.525586] pc : [] lr : [] psr: 20060093 [ 108.531875] sp : eccf1d98 ip : eccf1da8 fp : eccf1da4 [ 108.537122] r10: ec2d3800 r9 : eccf1f60 r8 : ecfc0000 [ 108.542370] r7 : eccf1e2c r6 : eccf1e2c r5 : 00000028 r4 : ec338e00 [ 108.548920] r3 : 00000000 r2 : eccf1e2c r1 : f0980028 r0 : 00000000 [ 108.555474] Flags: nzCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment none [ 108.562720] Control: 10c5387d Table: acf4004a DAC: 00000051 [ 108.568491] Process cat (pid: 389, stack limit = 0xd4318a65) [ 108.574174] Stack: (0xeccf1d98 to 0xeccf2000) Fixes: 721cabf6c660 ("soc: imx: move PGC handling to a new GPC driver") Signed-off-by: Anson Huang Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/soc/imx/gpc.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c index 1613ccf0c059..c54d229f8da4 100644 --- a/drivers/soc/imx/gpc.c +++ b/drivers/soc/imx/gpc.c @@ -27,9 +27,16 @@ #define GPC_PGC_SW2ISO_SHIFT 0x8 #define GPC_PGC_SW_SHIFT 0x0 +#define GPC_PGC_PCI_PDN 0x200 +#define GPC_PGC_PCI_SR 0x20c + #define GPC_PGC_GPU_PDN 0x260 #define GPC_PGC_GPU_PUPSCR 0x264 #define GPC_PGC_GPU_PDNSCR 0x268 +#define GPC_PGC_GPU_SR 0x26c + +#define GPC_PGC_DISP_PDN 0x240 +#define GPC_PGC_DISP_SR 0x24c #define GPU_VPU_PUP_REQ BIT(1) #define GPU_VPU_PDN_REQ BIT(0) @@ -303,10 +310,24 @@ static const struct of_device_id imx_gpc_dt_ids[] = { { } }; +static const struct regmap_range yes_ranges[] = { + regmap_reg_range(GPC_CNTR, GPC_CNTR), + regmap_reg_range(GPC_PGC_PCI_PDN, GPC_PGC_PCI_SR), + regmap_reg_range(GPC_PGC_GPU_PDN, GPC_PGC_GPU_SR), + regmap_reg_range(GPC_PGC_DISP_PDN, GPC_PGC_DISP_SR), +}; + +static const struct regmap_access_table access_table = { + .yes_ranges = yes_ranges, + .n_yes_ranges = ARRAY_SIZE(yes_ranges), +}; + static const struct regmap_config imx_gpc_regmap_config = { .reg_bits = 32, .val_bits = 32, .reg_stride = 4, + .rd_table = &access_table, + .wr_table = &access_table, .max_register = 0x2ac, }; -- GitLab From 119970f4775ba44e46545df5c3237ee8721dc1de Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Fri, 13 Jul 2018 20:50:47 +0000 Subject: [PATCH 1014/1291] ACPI / EC: Use ec_no_wakeup on more Thinkpad X1 Carbon 6th systems [ Upstream commit 2c4d6baf1bc4f7729773ffcee9ba2a9781578633 ] The ec_no_wakeup matcher added for Thinkpad X1 Carbon 6th gen systems beyond matched only a single DMI model (20KGS3JF01), that didn't cover my laptop (20KH002JUS). Change to match based on DMI product family to cover all X1 6th gen systems. Signed-off-by: Robin H. Johnson Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/ec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 355add64681b..3d624c72c6c2 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -2034,7 +2034,7 @@ static const struct dmi_system_id acpi_ec_no_wakeup[] = { .ident = "Thinkpad X1 Carbon 6th", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "20KGS3JF01"), + DMI_MATCH(DMI_PRODUCT_FAMILY, "Thinkpad X1 Carbon 6th"), }, }, { }, -- GitLab From 0ee106583f9c6fe2eccb5dbb9eb9562abbcba7ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 19 Jul 2018 21:38:23 +0200 Subject: [PATCH 1015/1291] ARM: dts: imx6: RDU2: fix irq type for mv88e6xxx switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e01a06c8089132bb4da035c6a83df23916ca3ebf ] The Marvell switches report their interrupts in a level sensitive way. When using edge sensitive detection a race condition in the interrupt handler of the swich might result in the OS to miss all future events which might make the switch non-functional. The problem is that both mv88e6xxx_g2_irq_thread_fn() and mv88e6xxx_g1_irq_thread_work() sample the irq cause register (MV88E6XXX_G2_INT_SRC and MV88E6XXX_G1_STS respectively) once and then handle the observed sources. If after sampling but before all observed irq sources are handled a new irq source gets active this is not noticed by the handler which returns unsuspecting, but the interrupt line stays active which prevents the edge detector to kick in. All device trees but imx6qdl-zii-rdu2 get this right (most of them by not specifying an interrupt parent). So fix imx6qdl-zii-rdu2 accordingly. Signed-off-by: Uwe Kleine-König Fixes: f64992d1a916 ("ARM: dts: imx6: RDU2: Add Switch interrupts") Reviewed-by: Andrew Lunn Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi index eeb7679fd348..849eb3443cde 100644 --- a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi @@ -644,7 +644,7 @@ switch: switch@0 { dsa,member = <0 0>; eeprom-length = <512>; interrupt-parent = <&gpio6>; - interrupts = <3 IRQ_TYPE_EDGE_FALLING>; + interrupts = <3 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; #interrupt-cells = <2>; -- GitLab From 7a12f4ed07a5f7b49c7d4f86ea67f65ec953cc40 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 19 Jul 2018 20:07:59 -0700 Subject: [PATCH 1016/1291] nvme: fix handling of metadata_len for NVME_IOCTL_IO_CMD [ Upstream commit 9b382768135ee3ff282f828c906574a8478e036b ] The old code in nvme_user_cmd() passed the userspace virtual address from nvme_passthru_cmd.metadata as the length of the metadata buffer as well as the address to nvme_submit_user_cmd(). Fixes: 63263d60 ("nvme: Use metadata for passthrough commands") Signed-off-by: Roland Dreier Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 38c128f230e7..3a63d58d2ca9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1016,7 +1016,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, (void __user *)(uintptr_t)cmd.addr, cmd.data_len, - (void __user *)(uintptr_t)cmd.metadata, cmd.metadata, + (void __user *)(uintptr_t)cmd.metadata, cmd.metadata_len, 0, &cmd.result, timeout); if (status >= 0) { if (put_user(cmd.result, &ucmd->result)) -- GitLab From 270d5d77191769eaa57f94eff3ef8119121a091a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 28 May 2018 13:31:13 +0200 Subject: [PATCH 1017/1291] KVM: irqfd: fix race between EPOLLHUP and irq_bypass_register_consumer commit 9432a3175770e06cb83eada2d91fac90c977cb99 upstream. A comment warning against this bug is there, but the code is not doing what the comment says. Therefore it is possible that an EPOLLHUP races against irq_bypass_register_consumer. The EPOLLHUP handler schedules irqfd_shutdown, and if that runs soon enough, you get a use-after-free. Reported-by: syzbot Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- virt/kvm/eventfd.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 58a9b31b0dd5..088734a700e9 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -405,11 +405,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) if (events & POLLIN) schedule_work(&irqfd->inject); - /* - * do not drop the file until the irqfd is fully initialized, otherwise - * we might race against the POLLHUP - */ - fdput(f); #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS if (kvm_arch_has_irq_bypass()) { irqfd->consumer.token = (void *)irqfd->eventfd; @@ -425,6 +420,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) #endif srcu_read_unlock(&kvm->irq_srcu, idx); + + /* + * do not drop the file until the irqfd is fully initialized, otherwise + * we might race against the POLLHUP + */ + fdput(f); return 0; fail: -- GitLab From 2ab95e71c8a592cb798e0059dd26a900c6e290a2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 19 Dec 2017 19:16:34 -0800 Subject: [PATCH 1018/1291] f2fs: return error during fill_super commit c39a1b348c4fe172729eff77c533dabc3c7cdaa7 upstream. Let's avoid BUG_ON during fill_super, when on-disk was totall corrupted. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/segment.c | 16 ++++++++++++---- fs/f2fs/segment.h | 22 ++++++++++++++++++---- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7c05bd4222b2..f05cbe99f033 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3240,7 +3240,7 @@ static int build_curseg(struct f2fs_sb_info *sbi) return restore_curseg_summaries(sbi); } -static void build_sit_entries(struct f2fs_sb_info *sbi) +static int build_sit_entries(struct f2fs_sb_info *sbi) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); @@ -3250,6 +3250,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) int sit_blk_cnt = SIT_BLK_CNT(sbi); unsigned int i, start, end; unsigned int readed, start_blk = 0; + int err = 0; do { readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, @@ -3268,7 +3269,9 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; f2fs_put_page(page, 1); - check_block_count(sbi, start, &sit); + err = check_block_count(sbi, start, &sit); + if (err) + return err; seg_info_from_raw_sit(se, &sit); /* build discard map only one time */ @@ -3303,7 +3306,9 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) old_valid_blocks = se->valid_blocks; - check_block_count(sbi, start, &sit); + err = check_block_count(sbi, start, &sit); + if (err) + break; seg_info_from_raw_sit(se, &sit); if (f2fs_discard_en(sbi)) { @@ -3323,6 +3328,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) se->valid_blocks - old_valid_blocks; } up_read(&curseg->journal_rwsem); + return err; } static void init_free_segmap(struct f2fs_sb_info *sbi) @@ -3492,7 +3498,9 @@ int build_segment_manager(struct f2fs_sb_info *sbi) return err; /* reinit free segmap based on SIT */ - build_sit_entries(sbi); + err = build_sit_entries(sbi); + if (err) + return err; init_free_segmap(sbi); err = build_dirty_segmap(sbi); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index e0a6cc23ace3..39ada30889b6 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -625,7 +625,7 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) /* * Summary block is always treated as an invalid block */ -static inline void check_block_count(struct f2fs_sb_info *sbi, +static inline int check_block_count(struct f2fs_sb_info *sbi, int segno, struct f2fs_sit_entry *raw_sit) { #ifdef CONFIG_F2FS_CHECK_FS @@ -647,11 +647,25 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, cur_pos = next_pos; is_valid = !is_valid; } while (cur_pos < sbi->blocks_per_seg); - BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); + + if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Mismatch valid blocks %d vs. %d", + GET_SIT_VBLOCKS(raw_sit), valid_blocks); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return -EINVAL; + } #endif /* check segment usage, and check boundary of a given segment number */ - f2fs_bug_on(sbi, GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg - || segno > TOTAL_SEGS(sbi) - 1); + if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg + || segno > TOTAL_SEGS(sbi) - 1)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong valid blocks %d or segno %u", + GET_SIT_VBLOCKS(raw_sit), segno); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return -EINVAL; + } + return 0; } static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, -- GitLab From deebf1d61e707f5a0f29a76cbb54ab333869d01b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 24 Apr 2018 21:34:05 -0600 Subject: [PATCH 1019/1291] f2fs: sanity check for total valid node blocks commit 8a29c1260e24e7c9c6ab138aa0017558d8b28208 upstream. This patch enhances sanity check for SIT entries. syzbot hit the following crash on upstream commit 83beed7b2b26f232d782127792dd0cd4362fdc41 (Fri Apr 20 17:56:32 2018 +0000) Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal syzbot dashboard link: https://syzkaller.appspot.com/bug?extid=bf9253040425feb155ad syzkaller reproducer: https://syzkaller.appspot.com/x/repro.syz?id=5692130282438656 Raw console output: https://syzkaller.appspot.com/x/log.txt?id=5095924598571008 Kernel config: https://syzkaller.appspot.com/x/.config?id=1808800213120130118 compiler: gcc (GCC) 8.0.1 20180413 (experimental) IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+bf9253040425feb155ad@syzkaller.appspotmail.com It will help syzbot understand when the bug is fixed. See footer for details. If you forward the report, please keep this part and the footer. F2FS-fs (loop0): invalid crc value F2FS-fs (loop0): Try to recover 1th superblock, ret: 0 F2FS-fs (loop0): Mounted with checkpoint version = d F2FS-fs (loop0): Bitmap was wrongly cleared, blk:9740 ------------[ cut here ]------------ kernel BUG at fs/f2fs/segment.c:1884! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 4508 Comm: syz-executor0 Not tainted 4.17.0-rc1+ #10 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:update_sit_entry+0x1215/0x1590 fs/f2fs/segment.c:1882 RSP: 0018:ffff8801af526708 EFLAGS: 00010282 RAX: ffffed0035ea4cc0 RBX: ffff8801ad454f90 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff82eeb87e RDI: ffffed0035ea4cb6 RBP: ffff8801af526760 R08: ffff8801ad4a2480 R09: ffffed003b5e4f90 R10: ffffed003b5e4f90 R11: ffff8801daf27c87 R12: ffff8801adb8d380 R13: 0000000000000001 R14: 0000000000000008 R15: 00000000ffffffff FS: 00000000014af940(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f06bc223000 CR3: 00000001adb02000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: allocate_data_block+0x66f/0x2050 fs/f2fs/segment.c:2663 do_write_page+0x105/0x1b0 fs/f2fs/segment.c:2727 write_node_page+0x129/0x350 fs/f2fs/segment.c:2770 __write_node_page+0x7da/0x1370 fs/f2fs/node.c:1398 sync_node_pages+0x18cf/0x1eb0 fs/f2fs/node.c:1652 block_operations+0x429/0xa60 fs/f2fs/checkpoint.c:1088 write_checkpoint+0x3ba/0x5380 fs/f2fs/checkpoint.c:1405 f2fs_sync_fs+0x2fb/0x6a0 fs/f2fs/super.c:1077 __sync_filesystem fs/sync.c:39 [inline] sync_filesystem+0x265/0x310 fs/sync.c:67 generic_shutdown_super+0xd7/0x520 fs/super.c:429 kill_block_super+0xa4/0x100 fs/super.c:1191 kill_f2fs_super+0x9f/0xd0 fs/f2fs/super.c:3030 deactivate_locked_super+0x97/0x100 fs/super.c:316 deactivate_super+0x188/0x1b0 fs/super.c:347 cleanup_mnt+0xbf/0x160 fs/namespace.c:1174 __cleanup_mnt+0x16/0x20 fs/namespace.c:1181 task_work_run+0x1e4/0x290 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:191 [inline] exit_to_usermode_loop+0x2bd/0x310 arch/x86/entry/common.c:166 prepare_exit_to_usermode arch/x86/entry/common.c:196 [inline] syscall_return_slowpath arch/x86/entry/common.c:265 [inline] do_syscall_64+0x6ac/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457d97 RSP: 002b:00007ffd46f9c8e8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000457d97 RDX: 00000000014b09a3 RSI: 0000000000000002 RDI: 00007ffd46f9da50 RBP: 00007ffd46f9da50 R08: 0000000000000000 R09: 0000000000000009 R10: 0000000000000005 R11: 0000000000000246 R12: 00000000014b0940 R13: 0000000000000000 R14: 0000000000000002 R15: 000000000000658e RIP: update_sit_entry+0x1215/0x1590 fs/f2fs/segment.c:1882 RSP: ffff8801af526708 ---[ end trace f498328bb02610a2 ]--- Reported-and-tested-by: syzbot+bf9253040425feb155ad@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+7d6d31d3bc702f566ce3@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+0a725420475916460f12@syzkaller.appspotmail.com Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/segment.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f05cbe99f033..3c7bbbae0afa 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3251,6 +3251,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) unsigned int i, start, end; unsigned int readed, start_blk = 0; int err = 0; + block_t total_node_blocks = 0; do { readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, @@ -3273,6 +3274,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (err) return err; seg_info_from_raw_sit(se, &sit); + if (IS_NODESEG(se->type)) + total_node_blocks += se->valid_blocks; /* build discard map only one time */ if (f2fs_discard_en(sbi)) { @@ -3305,11 +3308,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) sit = sit_in_journal(journal, i); old_valid_blocks = se->valid_blocks; + if (IS_NODESEG(se->type)) + total_node_blocks -= old_valid_blocks; err = check_block_count(sbi, start, &sit); if (err) break; seg_info_from_raw_sit(se, &sit); + if (IS_NODESEG(se->type)) + total_node_blocks += se->valid_blocks; if (f2fs_discard_en(sbi)) { if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { @@ -3328,6 +3335,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) se->valid_blocks - old_valid_blocks; } up_read(&curseg->journal_rwsem); + + if (!err && total_node_blocks != valid_node_count(sbi)) { + f2fs_msg(sbi->sb, KERN_ERR, + "SIT is corrupted node# %u vs %u", + total_node_blocks, valid_node_count(sbi)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = -EINVAL; + } + return err; } -- GitLab From fdb441da3858ce0523099f466b8433a1ea055865 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sun, 12 Aug 2018 16:38:03 -0400 Subject: [PATCH 1020/1291] parisc: Remove ordered stores from syscall.S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7797167ffde1f00446301cb22b37b7c03194cfaf upstream. Now that we use a sync prior to releasing the locks in syscall.S, we don't need the PA 2.0 ordered stores used to release some locks.  Using an ordered store, potentially slows the release and subsequent code. There are a number of other ordered stores and loads that serve no purpose.  I have converted these to normal stores. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/syscall.S | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 4886a6db42e9..5f7e57fcaeef 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -629,12 +629,12 @@ cas_action: stw %r1, 4(%sr2,%r20) #endif /* The load and store could fail */ -1: ldw,ma 0(%r26), %r28 +1: ldw 0(%r26), %r28 sub,<> %r28, %r25, %r0 -2: stw,ma %r24, 0(%r26) +2: stw %r24, 0(%r26) /* Free lock */ sync - stw,ma %r20, 0(%sr2,%r20) + stw %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG /* Clear thread register indicator */ stw %r0, 4(%sr2,%r20) @@ -798,30 +798,30 @@ cas2_action: ldo 1(%r0),%r28 /* 8bit CAS */ -13: ldb,ma 0(%r26), %r29 +13: ldb 0(%r26), %r29 sub,= %r29, %r25, %r0 b,n cas2_end -14: stb,ma %r24, 0(%r26) +14: stb %r24, 0(%r26) b cas2_end copy %r0, %r28 nop nop /* 16bit CAS */ -15: ldh,ma 0(%r26), %r29 +15: ldh 0(%r26), %r29 sub,= %r29, %r25, %r0 b,n cas2_end -16: sth,ma %r24, 0(%r26) +16: sth %r24, 0(%r26) b cas2_end copy %r0, %r28 nop nop /* 32bit CAS */ -17: ldw,ma 0(%r26), %r29 +17: ldw 0(%r26), %r29 sub,= %r29, %r25, %r0 b,n cas2_end -18: stw,ma %r24, 0(%r26) +18: stw %r24, 0(%r26) b cas2_end copy %r0, %r28 nop @@ -829,10 +829,10 @@ cas2_action: /* 64bit CAS */ #ifdef CONFIG_64BIT -19: ldd,ma 0(%r26), %r29 +19: ldd 0(%r26), %r29 sub,*= %r29, %r25, %r0 b,n cas2_end -20: std,ma %r24, 0(%r26) +20: std %r24, 0(%r26) copy %r0, %r28 #else /* Compare first word */ @@ -851,7 +851,7 @@ cas2_action: cas2_end: /* Free lock */ sync - stw,ma %r20, 0(%sr2,%r20) + stw %r20, 0(%sr2,%r20) /* Enable interrupts */ ssm PSW_SM_I, %r0 /* Return to userspace, set no error */ -- GitLab From caf3d4bd62cc11a055dce8a19e87a9f33b5e4bca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Jun 2018 21:35:07 -0700 Subject: [PATCH 1021/1291] xfrm_user: prevent leaking 2 bytes of kernel memory commit 45c180bc29babbedd6b8c01b975780ef44d9d09c upstream. struct xfrm_userpolicy_type has two holes, so we should not use C99 style initializer. KMSAN report: BUG: KMSAN: kernel-infoleak in copyout lib/iov_iter.c:140 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x1b14/0x2800 lib/iov_iter.c:571 CPU: 1 PID: 4520 Comm: syz-executor841 Not tainted 4.17.0+ #5 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:113 kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1117 kmsan_internal_check_memory+0x138/0x1f0 mm/kmsan/kmsan.c:1211 kmsan_copy_to_user+0x7a/0x160 mm/kmsan/kmsan.c:1253 copyout lib/iov_iter.c:140 [inline] _copy_to_iter+0x1b14/0x2800 lib/iov_iter.c:571 copy_to_iter include/linux/uio.h:106 [inline] skb_copy_datagram_iter+0x422/0xfa0 net/core/datagram.c:431 skb_copy_datagram_msg include/linux/skbuff.h:3268 [inline] netlink_recvmsg+0x6f1/0x1900 net/netlink/af_netlink.c:1959 sock_recvmsg_nosec net/socket.c:802 [inline] sock_recvmsg+0x1d6/0x230 net/socket.c:809 ___sys_recvmsg+0x3fe/0x810 net/socket.c:2279 __sys_recvmmsg+0x58e/0xe30 net/socket.c:2391 do_sys_recvmmsg+0x2a6/0x3e0 net/socket.c:2472 __do_sys_recvmmsg net/socket.c:2485 [inline] __se_sys_recvmmsg net/socket.c:2481 [inline] __x64_sys_recvmmsg+0x15d/0x1c0 net/socket.c:2481 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x446ce9 RSP: 002b:00007fc307918db8 EFLAGS: 00000293 ORIG_RAX: 000000000000012b RAX: ffffffffffffffda RBX: 00000000006dbc24 RCX: 0000000000446ce9 RDX: 000000000000000a RSI: 0000000020005040 RDI: 0000000000000003 RBP: 00000000006dbc20 R08: 0000000020004e40 R09: 0000000000000000 R10: 0000000040000000 R11: 0000000000000293 R12: 0000000000000000 R13: 00007ffc8d2df32f R14: 00007fc3079199c0 R15: 0000000000000001 Uninit was stored to memory at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline] kmsan_save_stack mm/kmsan/kmsan.c:294 [inline] kmsan_internal_chain_origin+0x12b/0x210 mm/kmsan/kmsan.c:685 kmsan_memcpy_origins+0x11d/0x170 mm/kmsan/kmsan.c:527 __msan_memcpy+0x109/0x160 mm/kmsan/kmsan_instr.c:413 __nla_put lib/nlattr.c:569 [inline] nla_put+0x276/0x340 lib/nlattr.c:627 copy_to_user_policy_type net/xfrm/xfrm_user.c:1678 [inline] dump_one_policy+0xbe1/0x1090 net/xfrm/xfrm_user.c:1708 xfrm_policy_walk+0x45a/0xd00 net/xfrm/xfrm_policy.c:1013 xfrm_dump_policy+0x1c0/0x2a0 net/xfrm/xfrm_user.c:1749 netlink_dump+0x9b5/0x1550 net/netlink/af_netlink.c:2226 __netlink_dump_start+0x1131/0x1270 net/netlink/af_netlink.c:2323 netlink_dump_start include/linux/netlink.h:214 [inline] xfrm_user_rcv_msg+0x8a3/0x9b0 net/xfrm/xfrm_user.c:2577 netlink_rcv_skb+0x37e/0x600 net/netlink/af_netlink.c:2448 xfrm_netlink_rcv+0xb2/0xf0 net/xfrm/xfrm_user.c:2598 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x1680/0x1750 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg net/socket.c:639 [inline] ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117 __sys_sendmsg net/socket.c:2155 [inline] __do_sys_sendmsg net/socket.c:2164 [inline] __se_sys_sendmsg net/socket.c:2162 [inline] __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Local variable description: ----upt.i@dump_one_policy Variable was created at: dump_one_policy+0x78/0x1090 net/xfrm/xfrm_user.c:1689 xfrm_policy_walk+0x45a/0xd00 net/xfrm/xfrm_policy.c:1013 Byte 130 of 137 is uninitialized Memory access starts at ffff88019550407f Fixes: c0144beaeca42 ("[XFRM] netlink: Use nla_put()/NLA_PUT() variantes") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Steffen Klassert Cc: Herbert Xu Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index dbfcfefd6d69..dde40f995ac0 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1665,9 +1665,11 @@ static inline size_t userpolicy_type_attrsize(void) #ifdef CONFIG_XFRM_SUB_POLICY static int copy_to_user_policy_type(u8 type, struct sk_buff *skb) { - struct xfrm_userpolicy_type upt = { - .type = type, - }; + struct xfrm_userpolicy_type upt; + + /* Sadly there are two holes in struct xfrm_userpolicy_type */ + memset(&upt, 0, sizeof(upt)); + upt.type = type; return nla_put(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); } -- GitLab From c764f22b2fc39fbc621493f3c43bf120eaf2dbc7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 17 Jul 2018 21:03:15 +0200 Subject: [PATCH 1022/1291] netfilter: conntrack: dccp: treat SYNC/SYNCACK as invalid if no prior state commit 6613b6173dee098997229caf1f3b961c49da75e6 upstream. When first DCCP packet is SYNC or SYNCACK, we insert a new conntrack that has an un-initialized timeout value, i.e. such entry could be reaped at any time. Mark them as INVALID and only ignore SYNC/SYNCACK when connection had an old state. Reported-by: syzbot+6f18401420df260e37ed@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_proto_dccp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 0f5a4d79f6b8..812de5496b37 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -243,14 +243,14 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = * We currently ignore Sync packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, [DCCP_PKT_SYNCACK] = { /* * We currently ignore SyncAck packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, }, [CT_DCCP_ROLE_SERVER] = { @@ -371,14 +371,14 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = * We currently ignore Sync packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, [DCCP_PKT_SYNCACK] = { /* * We currently ignore SyncAck packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, }, }; -- GitLab From 9e9f27e0d7ac938a78e603ba1a2a2bdab034394b Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 6 Aug 2018 10:38:34 -0400 Subject: [PATCH 1023/1291] packet: refine ring v3 block size test to hold one frame commit 4576cd469d980317c4edd9173f8b694aa71ea3a3 upstream. TPACKET_V3 stores variable length frames in fixed length blocks. Blocks must be able to store a block header, optional private space and at least one minimum sized frame. Frames, even for a zero snaplen packet, store metadata headers and optional reserved space. In the block size bounds check, ensure that the frame of the chosen configuration fits. This includes sockaddr_ll and optional tp_reserve. Syzbot was able to construct a ring with insuffient room for the sockaddr_ll in the header of a zero-length frame, triggering an out-of-bounds write in dev_parse_header. Convert the comparison to less than, as zero is a valid snap len. This matches the test for minimum tp_frame_size immediately below. Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") Fixes: eb73190f4fbe ("net/packet: refine check for priv area size") Reported-by: syzbot Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 409b288c13d1..8833a58ca3ee 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4269,6 +4269,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } if (req->tp_block_nr) { + unsigned int min_frame_size; + /* Sanity tests and some calculations */ err = -EBUSY; if (unlikely(rb->pg_vec)) @@ -4291,12 +4293,12 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; + min_frame_size = po->tp_hdrlen + po->tp_reserve; if (po->tp_version >= TPACKET_V3 && - req->tp_block_size <= - BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + sizeof(struct tpacket3_hdr)) + req->tp_block_size < + BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size) goto out; - if (unlikely(req->tp_frame_size < po->tp_hdrlen + - po->tp_reserve)) + if (unlikely(req->tp_frame_size < min_frame_size)) goto out; if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) goto out; -- GitLab From 3f59cf41fbfb622878c53815b9f464fe5aeaf7d9 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 8 Aug 2018 14:13:19 +0200 Subject: [PATCH 1024/1291] net/smc: no shutdown in state SMC_LISTEN commit caa21e19e08d7a1445116a93f7ab4e187ebbbadb upstream. Invoking shutdown for a socket in state SMC_LISTEN does not make sense. Nevertheless programs like syzbot fuzzing the kernel may try to do this. For SMC this means a socket refcounting problem. This patch makes sure a shutdown call for an SMC socket in state SMC_LISTEN simply returns with -ENOTCONN. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/smc/af_smc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 654a81238406..43ef7be69428 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1180,8 +1180,7 @@ static int smc_shutdown(struct socket *sock, int how) lock_sock(sk); rc = -ENOTCONN; - if ((sk->sk_state != SMC_LISTEN) && - (sk->sk_state != SMC_ACTIVE) && + if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_PEERCLOSEWAIT1) && (sk->sk_state != SMC_PEERCLOSEWAIT2) && (sk->sk_state != SMC_APPCLOSEWAIT1) && -- GitLab From 6541d98d114ef4ccf923c02ae665262450b3966c Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sun, 12 Aug 2018 16:31:17 -0400 Subject: [PATCH 1025/1291] parisc: Remove unnecessary barriers from spinlock.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3b885ac1dc35b87a39ee176a6c7e2af9c789d8b8 upstream. Now that mb() is an instruction barrier, it will slow performance if we issue unnecessary barriers. The spinlock defines have a number of unnecessary barriers.  The __ldcw() define is both a hardware and compiler barrier.  The mb() barriers in the routines using __ldcw() serve no purpose. The only barrier needed is the one in arch_spin_unlock().  We need to ensure all accesses are complete prior to releasing the lock. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/spinlock.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index af03359e6ac5..a82776592c8e 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -20,7 +20,6 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x, { volatile unsigned int *a; - mb(); a = __ldcw_align(x); while (__ldcw(a) == 0) while (*a == 0) @@ -30,16 +29,15 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x, local_irq_disable(); } else cpu_relax(); - mb(); } static inline void arch_spin_unlock(arch_spinlock_t *x) { volatile unsigned int *a; - mb(); + a = __ldcw_align(x); - *a = 1; mb(); + *a = 1; } static inline int arch_spin_trylock(arch_spinlock_t *x) @@ -47,10 +45,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *x) volatile unsigned int *a; int ret; - mb(); a = __ldcw_align(x); ret = __ldcw(a) != 0; - mb(); return ret; } -- GitLab From cf9c2bc5eea03bd96b08b8cbd1c8adfed4fa617f Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 19 Jul 2018 17:27:31 -0500 Subject: [PATCH 1026/1291] PCI: hotplug: Don't leak pci_slot on registration failure commit 4ce6435820d1f1cc2c2788e232735eb244bcc8a3 upstream. If addition of sysfs files fails on registration of a hotplug slot, the struct pci_slot as well as the entry in the slot_list is leaked. The issue has been present since the hotplug core was introduced in 2002: https://git.kernel.org/tglx/history/c/a8a2069f432c Perhaps the idea was that even though sysfs addition fails, the slot should still be usable. But that's not how drivers use the interface, they abort probe if a non-zero value is returned. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v2.4.15+ Cc: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pci_hotplug_core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index 7b0e97be9063..591f2e05ab1c 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -452,8 +452,17 @@ int __pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, list_add(&slot->slot_list, &pci_hotplug_slot_list); result = fs_add_slot(pci_slot); + if (result) + goto err_list_del; + kobject_uevent(&pci_slot->kobj, KOBJ_ADD); dbg("Added slot %s to the list\n", name); + goto out; + +err_list_del: + list_del(&slot->slot_list); + pci_slot->hotplug = NULL; + pci_destroy_slot(pci_slot); out: mutex_unlock(&pci_hp_mutex); return result; -- GitLab From 8af37982c3b281af5ee5b3cec8085f2706973357 Mon Sep 17 00:00:00 2001 From: Myron Stowe Date: Mon, 13 Aug 2018 12:19:39 -0600 Subject: [PATCH 1027/1291] PCI: Skip MPS logic for Virtual Functions (VFs) commit 3dbe97efe8bf450b183d6dee2305cbc032e6b8a4 upstream. PCIe r4.0, sec 9.3.5.4, "Device Control Register", shows both Max_Payload_Size (MPS) and Max_Read_request_Size (MRRS) to be 'RsvdP' for VFs. Just prior to the table it states: "PF and VF functionality is defined in Section 7.5.3.4 except where noted in Table 9-16. For VF fields marked 'RsvdP', the PF setting applies to the VF." All of which implies that with respect to Max_Payload_Size Supported (MPSS), MPS, and MRRS values, we should not be paying any attention to the VF's fields, but rather only to the PF's. Only looking at the PF's fields also logically makes sense as it's the sole physical interface to the PCIe bus. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200527 Fixes: 27d868b5e6cf ("PCI: Set MPS to match upstream bridge") Signed-off-by: Myron Stowe Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # 4.3+ Cc: Keith Busch Cc: Sinan Kaya Cc: Dongdong Liu Cc: Jon Mason Signed-off-by: Greg Kroah-Hartman --- drivers/pci/probe.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4bccaf688aad..e23bfd9845b1 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1560,6 +1560,10 @@ static void pci_configure_mps(struct pci_dev *dev) if (!pci_is_pcie(dev) || !bridge || !pci_is_pcie(bridge)) return; + /* MPS and MRRS fields are of type 'RsvdP' for VFs, short-circuit out */ + if (dev->is_virtfn) + return; + mps = pcie_get_mps(dev); p_mps = pcie_get_mps(bridge); -- GitLab From 2f27dfad05eff15b251fc012829dda9841e79377 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 19 Jul 2018 17:27:32 -0500 Subject: [PATCH 1028/1291] PCI: pciehp: Fix use-after-free on unplug commit 281e878eab191cce4259abbbf1a0322e3adae02c upstream. When pciehp is unbound (e.g. on unplug of a Thunderbolt device), the hotplug_slot struct is deregistered and thus freed before freeing the IRQ. The IRQ handler and the work items it schedules print the slot name referenced from the freed structure in various informational and debug log messages, each time resulting in a quadruple dereference of freed pointers (hotplug_slot -> pci_slot -> kobject -> name). At best the slot name is logged as "(null)", at worst kernel memory is exposed in logs or the driver crashes: pciehp 0000:10:00.0:pcie204: Slot((null)): Card not present An attacker may provoke the bug by unplugging multiple devices on a Thunderbolt daisy chain at once. Unplugging can also be simulated by powering down slots via sysfs. The bug is particularly easy to trigger in poll mode. It has been present since the driver's introduction in 2004: https://git.kernel.org/tglx/history/c/c16b4b14d980 Fix by rearranging teardown such that the IRQ is freed first. Run the work items queued by the IRQ handler to completion before freeing the hotplug_slot struct by draining the work queue from the ->release_slot callback which is invoked by pci_hp_deregister(). Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v2.6.4 Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pciehp.h | 1 + drivers/pci/hotplug/pciehp_core.c | 7 +++++++ drivers/pci/hotplug/pciehp_hpc.c | 5 ++--- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index e7d6cfaf3865..9fc4357c3db9 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -132,6 +132,7 @@ int pciehp_unconfigure_device(struct slot *p_slot); void pciehp_queue_pushbutton_work(struct work_struct *work); struct controller *pcie_init(struct pcie_device *dev); int pcie_init_notification(struct controller *ctrl); +void pcie_shutdown_notification(struct controller *ctrl); int pciehp_enable_slot(struct slot *p_slot); int pciehp_disable_slot(struct slot *p_slot); void pcie_reenable_notification(struct controller *ctrl); diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 1288289cc85d..c38e392d63e4 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -76,6 +76,12 @@ static int reset_slot(struct hotplug_slot *slot, int probe); */ static void release_slot(struct hotplug_slot *hotplug_slot) { + struct slot *slot = hotplug_slot->private; + + /* queued work needs hotplug_slot name */ + cancel_delayed_work(&slot->work); + drain_workqueue(slot->wq); + kfree(hotplug_slot->ops); kfree(hotplug_slot->info); kfree(hotplug_slot); @@ -278,6 +284,7 @@ static void pciehp_remove(struct pcie_device *dev) { struct controller *ctrl = get_service_data(dev); + pcie_shutdown_notification(ctrl); cleanup_slot(ctrl); pciehp_release_ctrl(ctrl); } diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 46c2ee2caf28..0c6b54a98402 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -789,7 +789,7 @@ int pcie_init_notification(struct controller *ctrl) return 0; } -static void pcie_shutdown_notification(struct controller *ctrl) +void pcie_shutdown_notification(struct controller *ctrl) { if (ctrl->notification_enabled) { pcie_disable_notification(ctrl); @@ -824,7 +824,7 @@ static int pcie_init_slot(struct controller *ctrl) static void pcie_cleanup_slot(struct controller *ctrl) { struct slot *slot = ctrl->slot; - cancel_delayed_work(&slot->work); + destroy_workqueue(slot->wq); kfree(slot); } @@ -912,7 +912,6 @@ struct controller *pcie_init(struct pcie_device *dev) void pciehp_release_ctrl(struct controller *ctrl) { - pcie_shutdown_notification(ctrl); pcie_cleanup_slot(ctrl); kfree(ctrl); } -- GitLab From b5b7417ed9d6d61df318f7b45c5c2ad38bbc09d2 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 19 Jul 2018 17:27:34 -0500 Subject: [PATCH 1029/1291] PCI: pciehp: Fix unprotected list iteration in IRQ handler commit 1204e35bedf4e5015cda559ed8c84789a6dae24e upstream. Commit b440bde74f04 ("PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device") iterates over the devices on a hotplug port's subordinate bus in pciehp's IRQ handler without acquiring pci_bus_sem. It is thus possible for a user to cause a crash by concurrently manipulating the device list, e.g. by disabling slot power via sysfs on a different CPU or by initiating a remove/rescan via sysfs. This can't be fixed by acquiring pci_bus_sem because it may sleep. The simplest fix is to avoid the list iteration altogether and just check the ignore_hotplug flag on the port itself. This works because pci_ignore_hotplug() sets the flag both on the device as well as on its parent bridge. We do lose the ability to print the name of the device blocking hotplug in the debug message, but that's probably bearable. Fixes: b440bde74f04 ("PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device") Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pciehp_hpc.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 0c6b54a98402..2fa830727362 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -562,8 +562,6 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id) { struct controller *ctrl = (struct controller *)dev_id; struct pci_dev *pdev = ctrl_dev(ctrl); - struct pci_bus *subordinate = pdev->subordinate; - struct pci_dev *dev; struct slot *slot = ctrl->slot; u16 status, events; u8 present; @@ -611,14 +609,9 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id) wake_up(&ctrl->queue); } - if (subordinate) { - list_for_each_entry(dev, &subordinate->devices, bus_list) { - if (dev->ignore_hotplug) { - ctrl_dbg(ctrl, "ignoring hotplug event %#06x (%s requested no hotplug)\n", - events, pci_name(dev)); - return IRQ_HANDLED; - } - } + if (pdev->ignore_hotplug) { + ctrl_dbg(ctrl, "ignoring hotplug event %#06x\n", events); + return IRQ_HANDLED; } /* Check Attention Button Pressed */ -- GitLab From 415a02d8b65ae48a8a01694de5ea3c574128710b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 12 Aug 2018 12:53:20 +0200 Subject: [PATCH 1030/1291] i2c: core: ACPI: Properly set status byte to 0 for multi-byte writes commit c463a158cb6c5d9a85b7d894cd4f8116e8bd6be0 upstream. acpi_gsb_i2c_write_bytes() returns i2c_transfer()'s return value, which is the number of transfers executed on success, so 1. The ACPI code expects us to store 0 in gsb->status for success, not 1. Specifically this breaks the following code in the Thinkpad 8 DSDT: ECWR = I2CW = ECWR /* \_SB_.I2C1.BAT0.ECWR */ If ((ECST == Zero)) { ECRD = I2CR /* \_SB_.I2C1.I2CR */ } Before this commit we set ECST to 1, causing the read to never happen breaking battery monitoring on the Thinkpad 8. This commit makes acpi_gsb_i2c_write_bytes() return 0 when i2c_transfer() returns 1, so the single write transfer completed successfully, and makes it return -EIO on for other (unexpected) return values >= 0. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Acked-by: Mika Westerberg Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-core-acpi.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index a9126b3cda61..847d9bf6744c 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -475,11 +475,16 @@ static int acpi_gsb_i2c_write_bytes(struct i2c_client *client, msgs[0].buf = buffer; ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); - if (ret < 0) - dev_err(&client->adapter->dev, "i2c write failed\n"); kfree(buffer); - return ret; + + if (ret < 0) { + dev_err(&client->adapter->dev, "i2c write failed: %d\n", ret); + return ret; + } + + /* 1 transfer must have completed successfully */ + return (ret == 1) ? 0 : -EIO; } static acpi_status -- GitLab From 82c6d4994ba86ca5edec001f690d9cd3e4cd442f Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Thu, 16 Aug 2018 10:43:12 +0200 Subject: [PATCH 1031/1291] i2c: imx: Fix race condition in dma read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bed4ff1ed4d8f2ef5007c5c6ae1b29c5677a3632 upstream. This fixes a race condition, where the DMAEN bit ends up being set after I2C slave has transmitted a byte following the dummy read. When that happens, an interrupt is generated instead, and no DMA request is generated to kickstart the DMA read, and a timeout happens after DMA_TIMEOUT (1 sec). Fixed by setting the DMAEN bit before the dummy read. Signed-off-by: Esben Haabendal Acked-by: Uwe Kleine-König Signed-off-by: Wolfram Sang Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-imx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 75c6b98585ba..b73dd837fb53 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -665,9 +665,6 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx, struct imx_i2c_dma *dma = i2c_imx->dma; struct device *dev = &i2c_imx->adapter.dev; - temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); - temp |= I2CR_DMAEN; - imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); dma->chan_using = dma->chan_rx; dma->dma_transfer_dir = DMA_DEV_TO_MEM; @@ -780,6 +777,7 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo int i, result; unsigned int temp; int block_data = msgs->flags & I2C_M_RECV_LEN; + int use_dma = i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data; dev_dbg(&i2c_imx->adapter.dev, "<%s> write slave address: addr=0x%x\n", @@ -806,12 +804,14 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo */ if ((msgs->len - 1) || block_data) temp &= ~I2CR_TXAK; + if (use_dma) + temp |= I2CR_DMAEN; imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR); /* dummy read */ dev_dbg(&i2c_imx->adapter.dev, "<%s> read data\n", __func__); - if (i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data) + if (use_dma) return i2c_imx_dma_read(i2c_imx, msgs, is_lastmsg); /* read data */ -- GitLab From ef3cf941e0dfe3b6cd9e377767298605d08a05da Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 21 Aug 2018 21:59:37 -0700 Subject: [PATCH 1032/1291] reiserfs: fix broken xattr handling (heap corruption, bad retval) commit a13f085d111e90469faf2d9965eb39b11c114d7e upstream. This fixes the following issues: - When a buffer size is supplied to reiserfs_listxattr() such that each individual name fits, but the concatenation of all names doesn't fit, reiserfs_listxattr() overflows the supplied buffer. This leads to a kernel heap overflow (verified using KASAN) followed by an out-of-bounds usercopy and is therefore a security bug. - When a buffer size is supplied to reiserfs_listxattr() such that a name doesn't fit, -ERANGE should be returned. But reiserfs instead just truncates the list of names; I have verified that if the only xattr on a file has a longer name than the supplied buffer length, listxattr() incorrectly returns zero. With my patch applied, -ERANGE is returned in both cases and the memory corruption doesn't happen anymore. Credit for making me clean this code up a bit goes to Al Viro, who pointed out that the ->actor calling convention is suboptimal and should be changed. Link: http://lkml.kernel.org/r/20180802151539.5373-1-jannh@google.com Fixes: 48b32a3553a5 ("reiserfs: use generic xattr handlers") Signed-off-by: Jann Horn Acked-by: Jeff Mahoney Cc: Eric Biggers Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/xattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 46492fb37a4c..505f87a8c724 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -792,8 +792,10 @@ static int listxattr_filler(struct dir_context *ctx, const char *name, return 0; size = namelen + 1; if (b->buf) { - if (size > b->size) + if (b->pos + size > b->size) { + b->pos = -ERANGE; return -ERANGE; + } memcpy(b->buf + b->pos, name, namelen); b->buf[b->pos + namelen] = 0; } -- GitLab From f4c88459f7c9320f587b839c3d24a2a9dc18a8a0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Aug 2018 13:09:23 +0200 Subject: [PATCH 1033/1291] Linux 4.14.67 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a495efcf3e9b..4dad2d1c24ba 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 66 +SUBLEVEL = 67 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 7ff4bf211f3e6b45ea4b0a61ecadda8d6ecb43c6 Mon Sep 17 00:00:00 2001 From: Paulo Flabiano Smorigo Date: Mon, 16 Oct 2017 20:54:19 -0200 Subject: [PATCH 1034/1291] crypto: vmx - Use skcipher for ctr fallback commit e666d4e9ceec94c0a88c94b7db31d56474da43b3 upstream. Signed-off-by: Paulo Flabiano Smorigo Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/vmx/aes_ctr.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index 02ba5f2aa0e6..cd777c75291d 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -27,21 +27,23 @@ #include #include #include +#include + #include "aesp8-ppc.h" struct p8_aes_ctr_ctx { - struct crypto_blkcipher *fallback; + struct crypto_skcipher *fallback; struct aes_key enc_key; }; static int p8_aes_ctr_init(struct crypto_tfm *tfm) { const char *alg = crypto_tfm_alg_name(tfm); - struct crypto_blkcipher *fallback; + struct crypto_skcipher *fallback; struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); - fallback = - crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK); + fallback = crypto_alloc_skcipher(alg, 0, + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(fallback)) { printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n", @@ -49,9 +51,9 @@ static int p8_aes_ctr_init(struct crypto_tfm *tfm) return PTR_ERR(fallback); } - crypto_blkcipher_set_flags( + crypto_skcipher_set_flags( fallback, - crypto_blkcipher_get_flags((struct crypto_blkcipher *)tfm)); + crypto_skcipher_get_flags((struct crypto_skcipher *)tfm)); ctx->fallback = fallback; return 0; @@ -62,7 +64,7 @@ static void p8_aes_ctr_exit(struct crypto_tfm *tfm) struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); if (ctx->fallback) { - crypto_free_blkcipher(ctx->fallback); + crypto_free_skcipher(ctx->fallback); ctx->fallback = NULL; } } @@ -81,7 +83,7 @@ static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key, pagefault_enable(); preempt_enable(); - ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); + ret += crypto_skcipher_setkey(ctx->fallback, key, keylen); return ret; } @@ -115,15 +117,14 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm)); - struct blkcipher_desc fallback_desc = { - .tfm = ctx->fallback, - .info = desc->info, - .flags = desc->flags - }; if (in_interrupt()) { - ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, - nbytes); + SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback); + skcipher_request_set_tfm(req, ctx->fallback); + skcipher_request_set_callback(req, desc->flags, NULL, NULL); + skcipher_request_set_crypt(req, src, dst, nbytes, desc->info); + ret = crypto_skcipher_encrypt(req); + skcipher_request_zero(req); } else { blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); -- GitLab From d2adc199957f29eb92df308812396098046089dd Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 7 Jun 2018 10:11:02 +0300 Subject: [PATCH 1035/1291] vti6: fix PMTU caching and reporting on xmit [ Upstream commit d6990976af7c5d8f55903bfb4289b6fb030bf754 ] When setting the skb->dst before doing the MTU check, the route PMTU caching and reporting is done on the new dst which is about to be released. Instead, PMTU handling should be done using the original dst. This is aligned with IPv4 VTI. Fixes: ccd740cbc6 ("vti6: Add pmtu handling to vti6_xmit.") Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_vti.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 0e0ab90a4334..b9e638cc955f 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -480,10 +480,6 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) goto tx_err_dst_release; } - skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); - skb_dst_set(skb, dst); - skb->dev = skb_dst(skb)->dev; - mtu = dst_mtu(dst); if (!skb->ignore_df && skb->len > mtu) { skb_dst_update_pmtu(skb, mtu); @@ -498,9 +494,14 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) htonl(mtu)); } - return -EMSGSIZE; + err = -EMSGSIZE; + goto tx_err_dst_release; } + skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); + skb_dst_set(skb, dst); + skb->dev = skb_dst(skb)->dev; + err = dst_output(t->net, skb->sk, skb); if (net_xmit_eval(err) == 0) { struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); -- GitLab From d35cc7ed2cfe509520ed4ed93f91121c41f478a6 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 21 Jun 2018 09:30:47 +0300 Subject: [PATCH 1036/1291] xfrm: fix missing dst_release() after policy blocking lbcast and multicast [ Upstream commit 8cc88773855f988d6a3bbf102bbd9dd9c828eb81 ] Fix missing dst_release() when local broadcast or multicast traffic is xfrm policy blocked. For IPv4 this results to dst leak: ip_route_output_flow() allocates dst_entry via __ip_route_output_key() and passes it to xfrm_lookup_route(). xfrm_lookup returns ERR_PTR(-EPERM) that is propagated. The dst that was allocated is never released. IPv4 local broadcast testcase: ping -b 192.168.1.255 & sleep 1 ip xfrm policy add src 0.0.0.0/0 dst 192.168.1.255/32 dir out action block IPv4 multicast testcase: ping 224.0.0.1 & sleep 1 ip xfrm policy add src 0.0.0.0/0 dst 224.0.0.1/32 dir out action block For IPv6 the missing dst_release() causes trouble e.g. when used in netns: ip netns add TEST ip netns exec TEST ip link set lo up ip link add dummy0 type dummy ip link set dev dummy0 netns TEST ip netns exec TEST ip addr add fd00::1111 dev dummy0 ip netns exec TEST ip link set dummy0 up ip netns exec TEST ping -6 -c 5 ff02::1%dummy0 & sleep 1 ip netns exec TEST ip xfrm policy add src ::/0 dst ff02::1 dir out action block wait ip netns del TEST After netns deletion we see: [ 258.239097] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 268.279061] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 278.367018] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 288.375259] unregister_netdevice: waiting for lo to become free. Usage count = 2 Fixes: ac37e2515c1a ("xfrm: release dst_orig in case of error in xfrm_lookup()") Signed-off-by: Tommi Rantala Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9c57d6a5816c..a6c0027cadb5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2285,6 +2285,9 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) return make_blackhole(net, dst_orig->ops->family, dst_orig); + if (IS_ERR(dst)) + dst_release(dst_orig); + return dst; } EXPORT_SYMBOL(xfrm_lookup_route); -- GitLab From 0118f86d21f1fdc83e5b560d761b5db0fded25ad Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 25 Jun 2018 14:00:07 +0200 Subject: [PATCH 1037/1291] xfrm: free skb if nlsk pointer is NULL [ Upstream commit 86126b77dcd551ce223e7293bb55854e3df05646 ] nlmsg_multicast() always frees the skb, so in case we cannot call it we must do that ourselves. Fixes: 21ee543edc0dea ("xfrm: fix race between netns cleanup and state expire notification") Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index dde40f995ac0..5554d28a32eb 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1021,10 +1021,12 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, { struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); - if (nlsk) - return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); - else - return -1; + if (!nlsk) { + kfree_skb(skb); + return -EPIPE; + } + + return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); } static inline size_t xfrm_spdinfo_msgsize(void) -- GitLab From b6f147a2d90765e68effe04c42f79875b33cd2af Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 27 Jun 2018 11:49:28 +0800 Subject: [PATCH 1038/1291] esp6: fix memleak on error path in esp6_input [ Upstream commit 7284fdf39a912322ce97de2d30def3c6068a418c ] This ought to be an omission in e6194923237 ("esp: Fix memleaks on error paths."). The memleak on error path in esp6_input is similar to esp_input of esp4. Fixes: e6194923237 ("esp: Fix memleaks on error paths.") Fixes: 3f29770723f ("ipsec: check return value of skb_to_sgvec always") Signed-off-by: Zhen Lei Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv6/esp6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 89910e2c10f4..f112fef79216 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -651,8 +651,10 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) sg_init_table(sg, nfrags); ret = skb_to_sgvec(skb, sg, 0, skb->len); - if (unlikely(ret < 0)) + if (unlikely(ret < 0)) { + kfree(tmp); goto out; + } skb->ip_summed = CHECKSUM_NONE; -- GitLab From de044d4ecc65c00d683f62923bddc2a55e2b9d1a Mon Sep 17 00:00:00 2001 From: "mpubbise@codeaurora.org" Date: Mon, 2 Jul 2018 15:40:14 +0530 Subject: [PATCH 1039/1291] mac80211: add stations tied to AP_VLANs during hw reconfig [ Upstream commit 19103a4bfb42f320395daa5616ece3e89e759d63 ] As part of hw reconfig, only stations linked to AP interfaces are added back to the driver ignoring those which are tied to AP_VLAN interfaces. It is true that there could be stations tied to the AP_VLAN interface while serving 4addr clients or when using AP_VLAN for VLAN operations; we should be adding these stations back to the driver as part of hw reconfig, failing to do so can cause functional issues. In the case of ath10k driver, the following errors were observed. ath10k_pci : failed to install key for non-existent peer XX:XX:XX:XX:XX:XX Workqueue: events_freezable ieee80211_restart_work [mac80211] (unwind_backtrace) from (show_stack+0x10/0x14) (show_stack) (dump_stack+0x80/0xa0) (dump_stack) (warn_slowpath_common+0x68/0x8c) (warn_slowpath_common) (warn_slowpath_null+0x18/0x20) (warn_slowpath_null) (ieee80211_enable_keys+0x88/0x154 [mac80211]) (ieee80211_enable_keys) (ieee80211_reconfig+0xc90/0x19c8 [mac80211]) (ieee80211_reconfig]) (ieee80211_restart_work+0x8c/0xa0 [mac80211]) (ieee80211_restart_work) (process_one_work+0x284/0x488) (process_one_work) (worker_thread+0x228/0x360) (worker_thread) (kthread+0xd8/0xec) (kthread) (ret_from_fork+0x14/0x24) Also while bringing down the AP VAP, WARN_ONs and errors related to peer removal were observed. ath10k_pci : failed to clear all peer wep keys for vdev 0: -2 ath10k_pci : failed to disassociate station: 8c:fd:f0:0a:8c:f5 vdev 0: -2 (unwind_backtrace) (show_stack+0x10/0x14) (show_stack) (dump_stack+0x80/0xa0) (dump_stack) (warn_slowpath_common+0x68/0x8c) (warn_slowpath_common) (warn_slowpath_null+0x18/0x20) (warn_slowpath_null) (sta_set_sinfo+0xb98/0xc9c [mac80211]) (sta_set_sinfo [mac80211]) (__sta_info_flush+0xf0/0x134 [mac80211]) (__sta_info_flush [mac80211]) (ieee80211_stop_ap+0xe8/0x390 [mac80211]) (ieee80211_stop_ap [mac80211]) (__cfg80211_stop_ap+0xe0/0x3dc [cfg80211]) (__cfg80211_stop_ap [cfg80211]) (cfg80211_stop_ap+0x30/0x44 [cfg80211]) (cfg80211_stop_ap [cfg80211]) (genl_rcv_msg+0x274/0x30c) (genl_rcv_msg) (netlink_rcv_skb+0x58/0xac) (netlink_rcv_skb) (genl_rcv+0x20/0x34) (genl_rcv) (netlink_unicast+0x11c/0x204) (netlink_unicast) (netlink_sendmsg+0x30c/0x370) (netlink_sendmsg) (sock_sendmsg+0x70/0x84) (sock_sendmsg) (___sys_sendmsg.part.3+0x188/0x228) (___sys_sendmsg.part.3) (__sys_sendmsg+0x4c/0x70) (__sys_sendmsg) (ret_fast_syscall+0x0/0x44) These issues got fixed by adding the stations which are tied to AP_VLANs back to the driver. Signed-off-by: Manikanta Pubbisetty Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 6aef6793d052..81f120466c38 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2068,7 +2068,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (!sta->uploaded) continue; - if (sta->sdata->vif.type != NL80211_IFTYPE_AP) + if (sta->sdata->vif.type != NL80211_IFTYPE_AP && + sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN) continue; for (state = IEEE80211_STA_NOTEXIST; -- GitLab From 147b89c421d0c57781ec57888f379e1ca7026cbb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 8 Jul 2018 19:36:02 -0400 Subject: [PATCH 1040/1291] ext4: clear mmp sequence number when remounting read-only [ Upstream commit 2dca60d98e241bea686004168f85208f215fc697 ] Previously, when an MMP-protected file system is remounted read-only, the kmmpd thread would exit the next time it woke up (a few seconds later), without resetting the MMP sequence number back to EXT4_MMP_SEQ_CLEAN. Fix this by explicitly killing the MMP thread when the file system is remounted read-only. Signed-off-by: Theodore Ts'o Cc: Andreas Dilger Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mmp.c | 7 ++----- fs/ext4/super.c | 2 ++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 27b9a76a0dfa..638ad4743477 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -186,11 +186,8 @@ static int kmmpd(void *data) goto exit_thread; } - if (sb_rdonly(sb)) { - ext4_warning(sb, "kmmpd being stopped since filesystem " - "has been remounted as readonly."); - goto exit_thread; - } + if (sb_rdonly(sb)) + break; diff = jiffies - last_update_time; if (diff < mmp_update_interval * HZ) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f30d2bf40471..b4fb085261fd 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5163,6 +5163,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal) ext4_mark_recovery_complete(sb, es); + if (sbi->s_mmp_tsk) + kthread_stop(sbi->s_mmp_tsk); } else { /* Make sure we can mount this feature set readwrite */ if (ext4_has_feature_readonly(sb) || -- GitLab From 7cb625dd9ed6f3386dd017d088f4ebe335b5638a Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Sun, 8 Jul 2018 09:57:22 +0000 Subject: [PATCH 1041/1291] nl80211: Add a missing break in parse_station_flags [ Upstream commit 5cf3006cc81d9aa09a10aa781fc065546b12919d ] I was looking at usually suppressed gcc warnings, [-Wimplicit-fallthrough=] in this case: The code definitely looks like a break is missing here. However I am not able to test the NL80211_IFTYPE_MESH_POINT, nor do I actually know what might be :) So please use this patch with caution and only if you are able to do some testing. Signed-off-by: Bernd Edlinger [johannes: looks obvious enough to apply as is, interesting though that it never seems to have been a problem] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4cd351b74e48..753f3e73c498 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4186,6 +4186,7 @@ static int parse_station_flags(struct genl_info *info, params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHORIZED); + break; default: return -EINVAL; } -- GitLab From 384f0d9fe4ad699251ff2f2686533a9b9356132d Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Tue, 3 Jul 2018 12:56:03 -0400 Subject: [PATCH 1042/1291] drm/bridge: adv7511: Reset registers on hotplug [ Upstream commit 5f3417569165a8ee57654217f73e0160312f409c ] The bridge loses its hw state when the cable is unplugged. If we detect this case in the hpd handler, reset its state. Reported-by: Rob Clark Tested-by: Rob Clark Reviewed-by: Archit Taneja Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180703165648.120401-1-seanpaul@chromium.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index b2431aee7887..f5091827628a 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -424,6 +424,18 @@ static void adv7511_hpd_work(struct work_struct *work) else status = connector_status_disconnected; + /* + * The bridge resets its registers on unplug. So when we get a plug + * event and we're already supposed to be powered, cycle the bridge to + * restore its state. + */ + if (status == connector_status_connected && + adv7511->connector.status == connector_status_disconnected && + adv7511->powered) { + regcache_mark_dirty(adv7511->regmap); + adv7511_power_on(adv7511); + } + if (adv7511->connector.status != status) { adv7511->connector.status = status; drm_kms_helper_hotplug_event(adv7511->connector.dev); -- GitLab From ca5fc53ad40104b110c659cbf20c9aa23881264f Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 11 Jul 2018 22:03:43 +0530 Subject: [PATCH 1043/1291] scsi: target: iscsi: cxgbit: fix max iso npdu calculation [ Upstream commit 1b350ea0c2f4df9aa30426614c8eb755a8c32814 ] - rounddown CXGBIT_MAX_ISO_PAYLOAD by csk->emss before calculating max_iso_npdu to get max TCP payload in multiple of mss. - call cxgbit_set_digest() before cxgbit_set_iso_npdu() to set csk->submode, it is used in calculating number of iso pdus. Signed-off-by: Varun Prakash Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/cxgbit/cxgbit_target.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c index 514986b57c2d..25eb3891e34b 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_target.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c @@ -652,6 +652,7 @@ static int cxgbit_set_iso_npdu(struct cxgbit_sock *csk) struct iscsi_param *param; u32 mrdsl, mbl; u32 max_npdu, max_iso_npdu; + u32 max_iso_payload; if (conn->login->leading_connection) { param = iscsi_find_param_from_key(MAXBURSTLENGTH, @@ -670,8 +671,10 @@ static int cxgbit_set_iso_npdu(struct cxgbit_sock *csk) mrdsl = conn_ops->MaxRecvDataSegmentLength; max_npdu = mbl / mrdsl; - max_iso_npdu = CXGBIT_MAX_ISO_PAYLOAD / - (ISCSI_HDR_LEN + mrdsl + + max_iso_payload = rounddown(CXGBIT_MAX_ISO_PAYLOAD, csk->emss); + + max_iso_npdu = max_iso_payload / + (ISCSI_HDR_LEN + mrdsl + cxgbit_digest_len[csk->submode]); csk->max_iso_npdu = min(max_npdu, max_iso_npdu); @@ -741,6 +744,9 @@ static int cxgbit_set_params(struct iscsi_conn *conn) if (conn_ops->MaxRecvDataSegmentLength > cdev->mdsl) conn_ops->MaxRecvDataSegmentLength = cdev->mdsl; + if (cxgbit_set_digest(csk)) + return -1; + if (conn->login->leading_connection) { param = iscsi_find_param_from_key(ERRORRECOVERYLEVEL, conn->param_list); @@ -764,7 +770,7 @@ static int cxgbit_set_params(struct iscsi_conn *conn) if (is_t5(cdev->lldi.adapter_type)) goto enable_ddp; else - goto enable_digest; + return 0; } if (test_bit(CDEV_ISO_ENABLE, &cdev->flags)) { @@ -781,10 +787,6 @@ static int cxgbit_set_params(struct iscsi_conn *conn) } } -enable_digest: - if (cxgbit_set_digest(csk)) - return -1; - return 0; } -- GitLab From 9ac1a4644bb8ff4343f9424ab6b3ab6db0f86c4a Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 11 Jul 2018 22:09:52 +0530 Subject: [PATCH 1044/1291] scsi: libiscsi: fix possible NULL pointer dereference in case of TMF [ Upstream commit a17037e7d59075053b522048742a08ac9500bde8 ] In iscsi_check_tmf_restrictions() task->hdr is dereferenced to print the opcode, it is possible that task->hdr is NULL. There are two cases based on opcode argument: 1. ISCSI_OP_SCSI_CMD - In this case alloc_pdu() is called after iscsi_check_tmf_restrictions() iscsi_prep_scsi_cmd_pdu() -> iscsi_check_tmf_restrictions() -> alloc_pdu(). Transport drivers allocate memory for iSCSI hdr in alloc_pdu() and assign it to task->hdr. In case of TMF task->hdr will be NULL resulting in NULL pointer dereference. 2. ISCSI_OP_SCSI_DATA_OUT - In this case transport driver can free the memory for iSCSI hdr after transmitting the pdu so task->hdr can be NULL or invalid. This patch fixes this issue by removing task->hdr->opcode from the printk statement. Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libiscsi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index bddbe2da5283..cf8a15e54d83 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -284,11 +284,11 @@ static int iscsi_check_tmf_restrictions(struct iscsi_task *task, int opcode) */ if (opcode != ISCSI_OP_SCSI_DATA_OUT) { iscsi_conn_printk(KERN_INFO, conn, - "task [op %x/%x itt " + "task [op %x itt " "0x%x/0x%x] " "rejected.\n", - task->hdr->opcode, opcode, - task->itt, task->hdr_itt); + opcode, task->itt, + task->hdr_itt); return -EACCES; } /* @@ -297,10 +297,10 @@ static int iscsi_check_tmf_restrictions(struct iscsi_task *task, int opcode) */ if (conn->session->fast_abort) { iscsi_conn_printk(KERN_INFO, conn, - "task [op %x/%x itt " + "task [op %x itt " "0x%x/0x%x] fast abort.\n", - task->hdr->opcode, opcode, - task->itt, task->hdr_itt); + opcode, task->itt, + task->hdr_itt); return -EACCES; } break; -- GitLab From a43eac2d855be9f441b0c6e277bdaf878b07a370 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 11 Apr 2018 17:31:35 +0200 Subject: [PATCH 1045/1291] drm/imx: imx-ldb: disable LDB on driver bind [ Upstream commit b58262396fabd43dc869b576e3defdd23b32fe94 ] The LVDS signal integrity is only guaranteed when the correct enable sequence (first IPU DI, then LDB) is used. If the LDB display output was active before the imx-drm driver is loaded (like when a bootsplash was active) the DI will be disabled by the full IPU reset we do when loading the driver. The LDB control registers are not part of the IPU range and thus will remain unchanged. This leads to the LDB still being active when the DI is getting enabled, effectively reversing the required enable sequence. Fix this by also disabling the LDB on driver bind. Signed-off-by: Lucas Stach Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/imx/imx-ldb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index 56dd7a9a8e25..17974c0b4be8 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -612,6 +612,9 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) return PTR_ERR(imx_ldb->regmap); } + /* disable LDB by resetting the control register to POR default */ + regmap_write(imx_ldb->regmap, IOMUXC_GPR2, 0); + imx_ldb->dev = dev; if (of_id) -- GitLab From 962ff36dac014618c60acae78db9a872abde7480 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 11 Apr 2018 17:31:36 +0200 Subject: [PATCH 1046/1291] drm/imx: imx-ldb: check if channel is enabled before printing warning [ Upstream commit c80d673b91a6c81d765864e10f2b15110ee900ad ] If the second LVDS channel has been disabled in the DT when using dual-channel mode we should not print a warning. Signed-off-by: Lucas Stach Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/imx/imx-ldb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index 17974c0b4be8..dd5312b02a8d 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -655,14 +655,14 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) if (ret || i < 0 || i > 1) return -EINVAL; + if (!of_device_is_available(child)) + continue; + if (dual && i > 0) { dev_warn(dev, "dual-channel mode, ignoring second output\n"); continue; } - if (!of_device_is_available(child)) - continue; - channel = &imx_ldb->channel[i]; channel->ldb = imx_ldb; channel->chno = i; -- GitLab From ced413c5ef85cf2da775043a9480a86139eb6de7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 16 Jul 2018 12:11:34 -0400 Subject: [PATCH 1047/1291] nbd: don't requeue the same request twice. [ Upstream commit d7d94d48a272fd7583dc3c83acb8f5ed4ef456a4 ] We can race with the snd timeout and the per-request timeout and end up requeuing the same request twice. We can't use the send_complete completion to tell if everything is ok because we hold the tx_lock during send, so the timeout stuff will block waiting to mark the socket dead, and we could be marked complete and still requeue. Instead add a flag to the socket so we know whether we've been requeued yet. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 5feba04ab940..7012f6d71742 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -112,12 +112,15 @@ struct nbd_device { struct task_struct *task_setup; }; +#define NBD_CMD_REQUEUED 1 + struct nbd_cmd { struct nbd_device *nbd; int index; int cookie; struct completion send_complete; blk_status_t status; + unsigned long flags; }; #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -146,6 +149,14 @@ static inline struct device *nbd_to_dev(struct nbd_device *nbd) return disk_to_dev(nbd->disk); } +static void nbd_requeue_cmd(struct nbd_cmd *cmd) +{ + struct request *req = blk_mq_rq_from_pdu(cmd); + + if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags)) + blk_mq_requeue_request(req, true); +} + static const char *nbdcmd_to_ascii(int cmd) { switch (cmd) { @@ -328,7 +339,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, nbd_mark_nsock_dead(nbd, nsock, 1); mutex_unlock(&nsock->tx_lock); } - blk_mq_requeue_request(req, true); + nbd_requeue_cmd(cmd); nbd_config_put(nbd); return BLK_EH_NOT_HANDLED; } @@ -484,6 +495,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) nsock->pending = req; nsock->sent = sent; } + set_bit(NBD_CMD_REQUEUED, &cmd->flags); return BLK_STS_RESOURCE; } dev_err_ratelimited(disk_to_dev(nbd->disk), @@ -525,6 +537,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) */ nsock->pending = req; nsock->sent = sent; + set_bit(NBD_CMD_REQUEUED, &cmd->flags); return BLK_STS_RESOURCE; } dev_err(disk_to_dev(nbd->disk), @@ -793,7 +806,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) */ blk_mq_start_request(req); if (unlikely(nsock->pending && nsock->pending != req)) { - blk_mq_requeue_request(req, true); + nbd_requeue_cmd(cmd); ret = 0; goto out; } @@ -806,7 +819,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) dev_err_ratelimited(disk_to_dev(nbd->disk), "Request send failed, requeueing\n"); nbd_mark_nsock_dead(nbd, nsock, 1); - blk_mq_requeue_request(req, true); + nbd_requeue_cmd(cmd); ret = 0; } out: @@ -831,6 +844,7 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, * done sending everything over the wire. */ init_completion(&cmd->send_complete); + clear_bit(NBD_CMD_REQUEUED, &cmd->flags); /* We can be called directly from the user space process, which means we * could possibly have signals pending so our sendmsg will fail. In @@ -1446,6 +1460,7 @@ static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->nbd = set->driver_data; + cmd->flags = 0; return 0; } -- GitLab From 05ee6166d702e8bea50322b31886a91fc8301672 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 16 Jul 2018 12:11:35 -0400 Subject: [PATCH 1048/1291] nbd: handle unexpected replies better [ Upstream commit 8f3ea35929a0806ad1397db99a89ffee0140822a ] If the server or network is misbehaving and we get an unexpected reply we can sometimes miss the request not being started and wait on a request and never get a response, or even double complete the same request. Fix this by replacing the send_complete completion with just a per command lock. Add a per command cookie as well so that we can know if we're getting a double completion for a previous event. Also check to make sure we dont have REQUEUED set as that means we raced with the timeout handler and need to just let the retry occur. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 75 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 61 insertions(+), 14 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 7012f6d71742..5e55d03d3d01 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -116,11 +116,12 @@ struct nbd_device { struct nbd_cmd { struct nbd_device *nbd; + struct mutex lock; int index; int cookie; - struct completion send_complete; blk_status_t status; unsigned long flags; + u32 cmd_cookie; }; #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -157,6 +158,27 @@ static void nbd_requeue_cmd(struct nbd_cmd *cmd) blk_mq_requeue_request(req, true); } +#define NBD_COOKIE_BITS 32 + +static u64 nbd_cmd_handle(struct nbd_cmd *cmd) +{ + struct request *req = blk_mq_rq_from_pdu(cmd); + u32 tag = blk_mq_unique_tag(req); + u64 cookie = cmd->cmd_cookie; + + return (cookie << NBD_COOKIE_BITS) | tag; +} + +static u32 nbd_handle_to_tag(u64 handle) +{ + return (u32)handle; +} + +static u32 nbd_handle_to_cookie(u64 handle) +{ + return (u32)(handle >> NBD_COOKIE_BITS); +} + static const char *nbdcmd_to_ascii(int cmd) { switch (cmd) { @@ -317,6 +339,9 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, } config = nbd->config; + if (!mutex_trylock(&cmd->lock)) + return BLK_EH_RESET_TIMER; + if (config->num_connections > 1) { dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out, retrying\n"); @@ -339,6 +364,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, nbd_mark_nsock_dead(nbd, nsock, 1); mutex_unlock(&nsock->tx_lock); } + mutex_unlock(&cmd->lock); nbd_requeue_cmd(cmd); nbd_config_put(nbd); return BLK_EH_NOT_HANDLED; @@ -349,6 +375,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, } set_bit(NBD_TIMEDOUT, &config->runtime_flags); cmd->status = BLK_STS_IOERR; + mutex_unlock(&cmd->lock); sock_shutdown(nbd); nbd_config_put(nbd); @@ -425,9 +452,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) struct iov_iter from; unsigned long size = blk_rq_bytes(req); struct bio *bio; + u64 handle; u32 type; u32 nbd_cmd_flags = 0; - u32 tag = blk_mq_unique_tag(req); int sent = nsock->sent, skip = 0; iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); @@ -469,6 +496,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) goto send_pages; } iov_iter_advance(&from, sent); + } else { + cmd->cmd_cookie++; } cmd->index = index; cmd->cookie = nsock->cookie; @@ -477,7 +506,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); request.len = htonl(size); } - memcpy(request.handle, &tag, sizeof(tag)); + handle = nbd_cmd_handle(cmd); + memcpy(request.handle, &handle, sizeof(handle)); dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", cmd, nbdcmd_to_ascii(type), @@ -570,10 +600,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) struct nbd_reply reply; struct nbd_cmd *cmd; struct request *req = NULL; + u64 handle; u16 hwq; u32 tag; struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; struct iov_iter to; + int ret = 0; reply.magic = 0; iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); @@ -591,8 +623,8 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) return ERR_PTR(-EPROTO); } - memcpy(&tag, reply.handle, sizeof(u32)); - + memcpy(&handle, reply.handle, sizeof(handle)); + tag = nbd_handle_to_tag(handle); hwq = blk_mq_unique_tag_to_hwq(tag); if (hwq < nbd->tag_set.nr_hw_queues) req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq], @@ -603,11 +635,25 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) return ERR_PTR(-ENOENT); } cmd = blk_mq_rq_to_pdu(req); + + mutex_lock(&cmd->lock); + if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) { + dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n", + req, cmd->cmd_cookie, nbd_handle_to_cookie(handle)); + ret = -ENOENT; + goto out; + } + if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) { + dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n", + req); + ret = -ENOENT; + goto out; + } if (ntohl(reply.error)) { dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", ntohl(reply.error)); cmd->status = BLK_STS_IOERR; - return cmd; + goto out; } dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd); @@ -632,18 +678,18 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) if (nbd_disconnected(config) || config->num_connections <= 1) { cmd->status = BLK_STS_IOERR; - return cmd; + goto out; } - return ERR_PTR(-EIO); + ret = -EIO; + goto out; } dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", cmd, bvec.bv_len); } - } else { - /* See the comment in nbd_queue_rq. */ - wait_for_completion(&cmd->send_complete); } - return cmd; +out: + mutex_unlock(&cmd->lock); + return ret ? ERR_PTR(ret) : cmd; } static void recv_work(struct work_struct *work) @@ -843,7 +889,7 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, * that the server is misbehaving (or there was an error) before we're * done sending everything over the wire. */ - init_completion(&cmd->send_complete); + mutex_lock(&cmd->lock); clear_bit(NBD_CMD_REQUEUED, &cmd->flags); /* We can be called directly from the user space process, which means we @@ -856,7 +902,7 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, ret = BLK_STS_IOERR; else if (!ret) ret = BLK_STS_OK; - complete(&cmd->send_complete); + mutex_unlock(&cmd->lock); return ret; } @@ -1461,6 +1507,7 @@ static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->nbd = set->driver_data; cmd->flags = 0; + mutex_init(&cmd->lock); return 0; } -- GitLab From 3f41c2d0e61859cef7a9bd91d3f2ca81c7955fda Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 20 Jun 2018 11:54:53 +0800 Subject: [PATCH 1049/1291] usb: gadget: r8a66597: Fix two possible sleep-in-atomic-context bugs in init_controller() [ Upstream commit 0602088b10a7c0b4e044a810678ef93d7cc5bf48 ] The driver may sleep with holding a spinlock. The function call paths (from bottom to top) in Linux-4.16.7 are: [FUNC] msleep drivers/usb/gadget/udc/r8a66597-udc.c, 839: msleep in init_controller drivers/usb/gadget/udc/r8a66597-udc.c, 96: init_controller in r8a66597_usb_disconnect drivers/usb/gadget/udc/r8a66597-udc.c, 93: spin_lock in r8a66597_usb_disconnect [FUNC] msleep drivers/usb/gadget/udc/r8a66597-udc.c, 835: msleep in init_controller drivers/usb/gadget/udc/r8a66597-udc.c, 96: init_controller in r8a66597_usb_disconnect drivers/usb/gadget/udc/r8a66597-udc.c, 93: spin_lock in r8a66597_usb_disconnect To fix these bugs, msleep() is replaced with mdelay(). This bug is found by my static analysis tool (DSAC-2) and checked by my code review. Signed-off-by: Jia-Ju Bai Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/r8a66597-udc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/r8a66597-udc.c b/drivers/usb/gadget/udc/r8a66597-udc.c index 118ad70f1af0..f694573d81b3 100644 --- a/drivers/usb/gadget/udc/r8a66597-udc.c +++ b/drivers/usb/gadget/udc/r8a66597-udc.c @@ -835,11 +835,11 @@ static void init_controller(struct r8a66597 *r8a66597) r8a66597_bset(r8a66597, XCKE, SYSCFG0); - msleep(3); + mdelay(3); r8a66597_bset(r8a66597, PLLC, SYSCFG0); - msleep(1); + mdelay(1); r8a66597_bset(r8a66597, SCKE, SYSCFG0); -- GitLab From 43b058dc21cdb8908d01e526a5bb1875ec00bd81 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 20 Jun 2018 11:55:08 +0800 Subject: [PATCH 1050/1291] usb: gadget: r8a66597: Fix a possible sleep-in-atomic-context bugs in r8a66597_queue() [ Upstream commit f36b507c14c4b6e634463a610294e9cb0065c8ea ] The driver may sleep in an interrupt handler. The function call path (from bottom to top) in Linux-4.16.7 is: [FUNC] r8a66597_queue(GFP_KERNEL) drivers/usb/gadget/udc/r8a66597-udc.c, 1193: r8a66597_queue in get_status drivers/usb/gadget/udc/r8a66597-udc.c, 1301: get_status in setup_packet drivers/usb/gadget/udc/r8a66597-udc.c, 1381: setup_packet in irq_control_stage drivers/usb/gadget/udc/r8a66597-udc.c, 1508: irq_control_stage in r8a66597_irq (interrupt handler) To fix this bug, GFP_KERNEL is replaced with GFP_ATOMIC. This bug is found by my static analysis tool (DSAC-2) and checked by my code review. Signed-off-by: Jia-Ju Bai Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/r8a66597-udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/r8a66597-udc.c b/drivers/usb/gadget/udc/r8a66597-udc.c index f694573d81b3..84b227ede082 100644 --- a/drivers/usb/gadget/udc/r8a66597-udc.c +++ b/drivers/usb/gadget/udc/r8a66597-udc.c @@ -1193,7 +1193,7 @@ __acquires(r8a66597->lock) r8a66597->ep0_req->length = 2; /* AV: what happens if we get called again before that gets through? */ spin_unlock(&r8a66597->lock); - r8a66597_queue(r8a66597->gadget.ep0, r8a66597->ep0_req, GFP_KERNEL); + r8a66597_queue(r8a66597->gadget.ep0, r8a66597->ep0_req, GFP_ATOMIC); spin_lock(&r8a66597->lock); } -- GitLab From fa18ff7edb88d9e84390305759fdd4d2a0f81d6d Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Thu, 21 Jun 2018 17:22:46 +0200 Subject: [PATCH 1051/1291] usb: gadget: f_uac2: fix error handling in afunc_bind (again) [ Upstream commit e87581fe0509020f77ebf0b7c4c1c338c6a4bcf6 ] If usb_ep_autoconfig() fails (i.e. returns a null endpoint descriptor), we expect afunc_bind() to fail (i.e. return a negative error code). However, due to v4.10-rc1 commit f1d3861d63a5 ("usb: gadget: f_uac2: fix error handling at afunc_bind"), afunc_bind() returns zero, telling the caller that it succeeded. This then generates NULL pointer dereference in below scenario on Rcar H3-ES20-Salvator-X target: rcar-gen3:/home/root# modprobe g_audio [ 626.521155] g_audio gadget: afunc_bind:565 Error! [ 626.526319] g_audio gadget: Linux USB Audio Gadget, version: Feb 2, 2012 [ 626.533405] g_audio gadget: g_audio ready rcar-gen3:/home/root# rcar-gen3:/home/root# modprobe -r g_audio [ 728.256707] ================================================================== [ 728.264293] BUG: KASAN: null-ptr-deref in u_audio_stop_capture+0x70/0x268 [u_audio] [ 728.272244] Read of size 8 at addr 00000000000000a0 by task modprobe/2545 [ 728.279309] [ 728.280849] CPU: 0 PID: 2545 Comm: modprobe Tainted: G WC 4.14.47+ #152 [ 728.288778] Hardware name: Renesas Salvator-X board based on r8a7795 ES2.0+ (DT) [ 728.296454] Call trace: [ 728.299151] [] dump_backtrace+0x0/0x364 [ 728.304808] [] show_stack+0x14/0x1c [ 728.310081] [] dump_stack+0x108/0x174 [ 728.315522] [] kasan_report+0x1fc/0x354 [ 728.321134] [] __asan_load8+0x24/0x94 [ 728.326600] [] u_audio_stop_capture+0x70/0x268 [u_audio] [ 728.333735] [] afunc_disable+0x44/0x60 [usb_f_uac2] [ 728.340503] [] usb_remove_function+0x9c/0x210 [libcomposite] [ 728.348060] [] remove_config.isra.2+0x1d8/0x218 [libcomposite] [ 728.355788] [] __composite_unbind+0x104/0x1f8 [libcomposite] [ 728.363339] [] composite_unbind+0x10/0x18 [libcomposite] [ 728.370536] [] usb_gadget_remove_driver+0xc0/0x170 [udc_core] [ 728.378172] [] usb_gadget_unregister_driver+0x1cc/0x258 [udc_core] [ 728.386274] [] usb_composite_unregister+0x10/0x18 [libcomposite] [ 728.394116] [] audio_driver_exit+0x14/0x28 [g_audio] [ 728.400878] [] SyS_delete_module+0x288/0x32c [ 728.406935] Exception stack(0xffff8006cf6c7ec0 to 0xffff8006cf6c8000) [ 728.413624] 7ec0: 0000000006136428 0000000000000800 0000000000000000 0000ffffd706efe8 [ 728.421718] 7ee0: 0000ffffd706efe9 000000000000000a 1999999999999999 0000000000000000 [ 728.429792] 7f00: 000000000000006a 000000000042c078 0000000000000000 0000000000000005 [ 728.437870] 7f20: 0000000000000000 0000000000000000 0000000000000004 0000000000000000 [ 728.445952] 7f40: 000000000042bfc8 0000ffffbc7c8f40 0000000000000000 00000000061363c0 [ 728.454035] 7f60: 0000000006136428 0000000000000000 0000000000000000 0000000006136428 [ 728.462114] 7f80: 000000000042c000 0000ffffd7071448 000000000042c000 0000000000000000 [ 728.470190] 7fa0: 00000000061350c0 0000ffffd7070010 000000000041129c 0000ffffd7070010 [ 728.478281] 7fc0: 0000ffffbc7c8f48 0000000060000000 0000000006136428 000000000000006a [ 728.486351] 7fe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 728.494434] [] el0_svc_naked+0x34/0x38 [ 728.499957] ================================================================== [ 728.507801] Unable to handle kernel NULL pointer dereference at virtual address 000000a0 [ 728.517742] Mem abort info: [ 728.520993] Exception class = DABT (current EL), IL = 32 bits [ 728.527375] SET = 0, FnV = 0 [ 728.530731] EA = 0, S1PTW = 0 [ 728.534361] Data abort info: [ 728.537650] ISV = 0, ISS = 0x00000006 [ 728.541863] CM = 0, WnR = 0 [ 728.545167] user pgtable: 4k pages, 48-bit VAs, pgd = ffff8006c6100000 [ 728.552156] [00000000000000a0] *pgd=0000000716a8d003 [ 728.557519] , *pud=00000007116fc003 [ 728.561259] , *pmd=0000000000000000 [ 728.564985] Internal error: Oops: 96000006 [#1] PREEMPT SMP [ 728.570815] Modules linked in: [ 728.574023] usb_f_uac2 [ 728.576560] u_audio [ 728.578827] g_audio(-) [ 728.581361] libcomposite [ 728.584071] configfs [ 728.586428] aes_ce_blk [ 728.588960] sata_rcar [ 728.591421] crypto_simd [ 728.594039] cryptd [ 728.596217] libata [ 728.598396] aes_ce_cipher [ 728.601188] crc32_ce [ 728.603542] ghash_ce [ 728.605896] gf128mul [ 728.608250] aes_arm64 [ 728.610692] scsi_mod [ 728.613046] sha2_ce [ 728.615313] xhci_plat_hcd [ 728.618106] sha256_arm64 [ 728.620811] sha1_ce [ 728.623077] renesas_usbhs [ 728.625869] xhci_hcd [ 728.628243] renesas_usb3 [ 728.630948] sha1_generic [ 728.633670] ravb_streaming(C) [ 728.636814] udc_core [ 728.639168] cpufreq_dt [ 728.641697] rcar_gen3_thermal [ 728.644840] usb_dmac [ 728.647194] pwm_rcar [ 728.649548] thermal_sys [ 728.652165] virt_dma [ 728.654519] mch_core(C) [ 728.657137] pwm_bl [ 728.659315] snd_soc_rcar [ 728.662020] snd_aloop [ 728.664462] snd_soc_generic_card [ 728.667869] snd_soc_ak4613 [ 728.670749] ipv6 [ 728.672768] autofs4 [ 728.675052] CPU: 0 PID: 2545 Comm: modprobe Tainted: G B WC 4.14.47+ #152 [ 728.682973] Hardware name: Renesas Salvator-X board based on r8a7795 ES2.0+ (DT) [ 728.690637] task: ffff8006ced38000 task.stack: ffff8006cf6c0000 [ 728.696814] PC is at u_audio_stop_capture+0x70/0x268 [u_audio] [ 728.702896] LR is at u_audio_stop_capture+0x70/0x268 [u_audio] [ 728.708964] pc : [] lr : [] pstate: 60000145 [ 728.716620] sp : ffff8006cf6c7a50 [ 728.720154] x29: ffff8006cf6c7a50 [ 728.723760] x28: ffff8006ced38000 [ 728.727272] x27: ffff200008fd7000 [ 728.730857] x26: ffff2000021d2340 [ 728.734361] x25: 0000000000000000 [ 728.737948] x24: ffff200009e94b08 [ 728.741452] x23: 00000000000000a0 [ 728.745052] x22: 00000000000000a8 [ 728.748558] x21: 1ffff000d9ed8f7c [ 728.752142] x20: ffff8006d671a800 [ 728.755646] x19: 0000000000000000 [ 728.759231] x18: 0000000000000000 [ 728.762736] x17: 0000ffffbc7c8f40 [ 728.766320] x16: ffff200008213c4c [ 728.769823] x15: 0000000000000000 [ 728.773408] x14: 0720072007200720 [ 728.776912] x13: 0720072007200720 [ 728.780497] x12: ffffffffffffffff [ 728.784001] x11: 0000000000000040 [ 728.787598] x10: 0000000000001600 [ 728.791103] x9 : ffff8006cf6c77a0 [ 728.794689] x8 : ffff8006ced39660 [ 728.798193] x7 : ffff20000811c738 [ 728.801794] x6 : 0000000000000000 [ 728.805299] x5 : dfff200000000000 [ 728.808885] x4 : ffff8006ced38000 [ 728.812390] x3 : ffff200008fb46e8 [ 728.815976] x2 : 0000000000000007 [ 728.819480] x1 : 3ba68643e7431500 [ 728.823066] x0 : 0000000000000000 [ 728.826574] Process modprobe (pid: 2545, stack limit = 0xffff8006cf6c0000) [ 728.833704] Call trace: [ 728.836292] Exception stack(0xffff8006cf6c7910 to 0xffff8006cf6c7a50) [ 728.842987] 7900: 0000000000000000 3ba68643e7431500 [ 728.851084] 7920: 0000000000000007 ffff200008fb46e8 ffff8006ced38000 dfff200000000000 [ 728.859173] 7940: 0000000000000000 ffff20000811c738 ffff8006ced39660 ffff8006cf6c77a0 [ 728.867248] 7960: 0000000000001600 0000000000000040 ffffffffffffffff 0720072007200720 [ 728.875323] 7980: 0720072007200720 0000000000000000 ffff200008213c4c 0000ffffbc7c8f40 [ 728.883412] 79a0: 0000000000000000 0000000000000000 ffff8006d671a800 1ffff000d9ed8f7c [ 728.891485] 79c0: 00000000000000a8 00000000000000a0 ffff200009e94b08 0000000000000000 [ 728.899561] 79e0: ffff2000021d2340 ffff200008fd7000 ffff8006ced38000 ffff8006cf6c7a50 [ 728.907636] 7a00: ffff2000021e1618 ffff8006cf6c7a50 ffff2000021e1618 0000000060000145 [ 728.915710] 7a20: 0000000000000008 0000000000000000 0000ffffffffffff 3ba68643e7431500 [ 728.923780] 7a40: ffff8006cf6c7a50 ffff2000021e1618 [ 728.928880] [] u_audio_stop_capture+0x70/0x268 [u_audio] [ 728.936032] [] afunc_disable+0x44/0x60 [usb_f_uac2] [ 728.942822] [] usb_remove_function+0x9c/0x210 [libcomposite] [ 728.950385] [] remove_config.isra.2+0x1d8/0x218 [libcomposite] [ 728.958134] [] __composite_unbind+0x104/0x1f8 [libcomposite] [ 728.965689] [] composite_unbind+0x10/0x18 [libcomposite] [ 728.972882] [] usb_gadget_remove_driver+0xc0/0x170 [udc_core] [ 728.980522] [] usb_gadget_unregister_driver+0x1cc/0x258 [udc_core] [ 728.988638] [] usb_composite_unregister+0x10/0x18 [libcomposite] [ 728.996472] [] audio_driver_exit+0x14/0x28 [g_audio] [ 729.003231] [] SyS_delete_module+0x288/0x32c [ 729.009278] Exception stack(0xffff8006cf6c7ec0 to 0xffff8006cf6c8000) [ 729.015946] 7ec0: 0000000006136428 0000000000000800 0000000000000000 0000ffffd706efe8 [ 729.024022] 7ee0: 0000ffffd706efe9 000000000000000a 1999999999999999 0000000000000000 [ 729.032099] 7f00: 000000000000006a 000000000042c078 0000000000000000 0000000000000005 [ 729.040172] 7f20: 0000000000000000 0000000000000000 0000000000000004 0000000000000000 [ 729.048263] 7f40: 000000000042bfc8 0000ffffbc7c8f40 0000000000000000 00000000061363c0 [ 729.056337] 7f60: 0000000006136428 0000000000000000 0000000000000000 0000000006136428 [ 729.064411] 7f80: 000000000042c000 0000ffffd7071448 000000000042c000 0000000000000000 [ 729.072484] 7fa0: 00000000061350c0 0000ffffd7070010 000000000041129c 0000ffffd7070010 [ 729.080563] 7fc0: 0000ffffbc7c8f48 0000000060000000 0000000006136428 000000000000006a [ 729.088636] 7fe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 729.096733] [] el0_svc_naked+0x34/0x38 [ 729.102259] Code: 9597d1b3 aa1703e0 9102a276 958792b9 (f9405275) [ 729.108617] ---[ end trace 7560c5fa3d100243 ]--- After this patch is applied, the issue is fixed: rcar-gen3:/home/root# modprobe g_audio [ 59.217127] g_audio gadget: afunc_bind:565 Error! [ 59.222329] g_audio ee020000.usb: failed to start g_audio: -19 modprobe: ERROR: could not insert 'g_audio': No such device rcar-gen3:/home/root# modprobe -r g_audio rcar-gen3:/home/root# Fixes: f1d3861d63a5 ("usb: gadget: f_uac2: fix error handling at afunc_bind") Signed-off-by: Eugeniu Rosca Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 97cb2dfd6369..c6ce178775b6 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -563,13 +563,13 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) agdev->out_ep = usb_ep_autoconfig(gadget, &fs_epout_desc); if (!agdev->out_ep) { dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); - return ret; + return -ENODEV; } agdev->in_ep = usb_ep_autoconfig(gadget, &fs_epin_desc); if (!agdev->in_ep) { dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); - return ret; + return -ENODEV; } agdev->in_ep_maxpsize = max_t(u16, -- GitLab From dc126a1e5fb832f1f5094b4d3faf297d08314dfb Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Thu, 21 Jun 2018 17:22:47 +0200 Subject: [PATCH 1052/1291] usb: gadget: u_audio: fix pcm/card naming in g_audio_setup() [ Upstream commit dfa042fa310caa475667b8c38d852f14439e0b01 ] Fix below smatch (v0.5.0-4443-g69e9094e11c1) warnings: drivers/usb/gadget/function/u_audio.c:607 g_audio_setup() warn: strcpy() 'pcm_name' of unknown size might be too large for 'pcm->name' drivers/usb/gadget/function/u_audio.c:614 g_audio_setup() warn: strcpy() 'card_name' of unknown size might be too large for 'card->driver' drivers/usb/gadget/function/u_audio.c:615 g_audio_setup() warn: strcpy() 'card_name' of unknown size might be too large for 'card->shortname' Below commits performed a similar 's/strcpy/strlcpy/' rework: * v2.6.31 commit 8372d4980fbc ("ALSA: ctxfi - Fix PCM device naming") * v4.14 commit 003d3e70dbeb ("ALSA: ad1848: fix format string overflow warning") * v4.14 commit 6d8b04de87e1 ("ALSA: cs423x: fix format string overflow warning") Fixes: eb9fecb9e69b ("usb: gadget: f_uac2: split out audio core") Signed-off-by: Eugeniu Rosca Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_audio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 3971bbab88bd..429d7928b7c9 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -604,15 +604,15 @@ int g_audio_setup(struct g_audio *g_audio, const char *pcm_name, if (err < 0) goto snd_fail; - strcpy(pcm->name, pcm_name); + strlcpy(pcm->name, pcm_name, sizeof(pcm->name)); pcm->private_data = uac; uac->pcm = pcm; snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &uac_pcm_ops); snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &uac_pcm_ops); - strcpy(card->driver, card_name); - strcpy(card->shortname, card_name); + strlcpy(card->driver, card_name, sizeof(card->driver)); + strlcpy(card->shortname, card_name, sizeof(card->shortname)); sprintf(card->longname, "%s %i", card_name, card->dev->id); snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_CONTINUOUS, -- GitLab From 224c0d0894ff7c3e85add1ce02a523d7f49cfdda Mon Sep 17 00:00:00 2001 From: Joshua Frkuska Date: Thu, 21 Jun 2018 17:22:48 +0200 Subject: [PATCH 1053/1291] usb: gadget: u_audio: update hw_ptr in iso_complete after data copied [ Upstream commit 6b37bd78d30c890e575a1bda22978d1d2a233362 ] In u_audio_iso_complete, the runtime hw_ptr is updated before the data is actually copied over to/from the buffer/dma area. When ALSA uses this hw_ptr, the data may not actually be available to be used. This causes trash/stale audio to play/record. This patch updates the hw_ptr after the data has been copied to avoid this. Fixes: 132fcb460839 ("usb: gadget: Add Audio Class 2.0 Driver") Signed-off-by: Joshua Frkuska Signed-off-by: Eugeniu Rosca Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_audio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 429d7928b7c9..725593f7da9b 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -152,7 +152,6 @@ static void u_audio_iso_complete(struct usb_ep *ep, struct usb_request *req) update_alsa = true; hw_ptr = prm->hw_ptr; - prm->hw_ptr = (prm->hw_ptr + req->actual) % prm->dma_bytes; spin_unlock_irqrestore(&prm->lock, flags); @@ -177,6 +176,11 @@ static void u_audio_iso_complete(struct usb_ep *ep, struct usb_request *req) } } + spin_lock_irqsave(&prm->lock, flags); + /* update hw_ptr after data is copied to memory */ + prm->hw_ptr = (hw_ptr + req->actual) % prm->dma_bytes; + spin_unlock_irqrestore(&prm->lock, flags); + exit: if (usb_ep_queue(ep, req, GFP_ATOMIC)) dev_err(uac->card->dev, "%d Error!\n", __LINE__); -- GitLab From 42b09bece176fdbf3b8ebc459b5046901ac49be4 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Thu, 21 Jun 2018 17:22:49 +0200 Subject: [PATCH 1054/1291] usb: gadget: u_audio: remove caching of stream buffer parameters [ Upstream commit 96afb54ece0ee903d23a7ac04ddc461413b972c4 ] There is no necessity to copy PCM stream ring buffer area and size properties to UAC private data structure, these values can be got from substream itself. The change gives more control on substream and avoid stale caching. Fixes: 132fcb460839 ("usb: gadget: Add Audio Class 2.0 Driver") Signed-off-by: Vladimir Zapolskiy Signed-off-by: Eugeniu Rosca Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_audio.c | 30 ++++++++++++--------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 725593f7da9b..717656e17afa 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -41,9 +41,6 @@ struct uac_req { struct uac_rtd_params { struct snd_uac_chip *uac; /* parent chip */ bool ep_enabled; /* if the ep is enabled */ - /* Size of the ring buffer */ - size_t dma_bytes; - unsigned char *dma_area; struct snd_pcm_substream *ss; @@ -99,6 +96,7 @@ static void u_audio_iso_complete(struct usb_ep *ep, struct usb_request *req) int status = req->status; struct uac_req *ur = req->context; struct snd_pcm_substream *substream; + struct snd_pcm_runtime *runtime; struct uac_rtd_params *prm = ur->pp; struct snd_uac_chip *uac = prm->uac; @@ -120,6 +118,7 @@ static void u_audio_iso_complete(struct usb_ep *ep, struct usb_request *req) if (!substream) goto exit; + runtime = substream->runtime; spin_lock_irqsave(&prm->lock, flags); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { @@ -156,29 +155,31 @@ static void u_audio_iso_complete(struct usb_ep *ep, struct usb_request *req) spin_unlock_irqrestore(&prm->lock, flags); /* Pack USB load in ALSA ring buffer */ - pending = prm->dma_bytes - hw_ptr; + pending = runtime->dma_bytes - hw_ptr; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { if (unlikely(pending < req->actual)) { - memcpy(req->buf, prm->dma_area + hw_ptr, pending); - memcpy(req->buf + pending, prm->dma_area, + memcpy(req->buf, runtime->dma_area + hw_ptr, pending); + memcpy(req->buf + pending, runtime->dma_area, req->actual - pending); } else { - memcpy(req->buf, prm->dma_area + hw_ptr, req->actual); + memcpy(req->buf, runtime->dma_area + hw_ptr, + req->actual); } } else { if (unlikely(pending < req->actual)) { - memcpy(prm->dma_area + hw_ptr, req->buf, pending); - memcpy(prm->dma_area, req->buf + pending, + memcpy(runtime->dma_area + hw_ptr, req->buf, pending); + memcpy(runtime->dma_area, req->buf + pending, req->actual - pending); } else { - memcpy(prm->dma_area + hw_ptr, req->buf, req->actual); + memcpy(runtime->dma_area + hw_ptr, req->buf, + req->actual); } } spin_lock_irqsave(&prm->lock, flags); /* update hw_ptr after data is copied to memory */ - prm->hw_ptr = (hw_ptr + req->actual) % prm->dma_bytes; + prm->hw_ptr = (hw_ptr + req->actual) % runtime->dma_bytes; spin_unlock_irqrestore(&prm->lock, flags); exit: @@ -260,11 +261,8 @@ static int uac_pcm_hw_params(struct snd_pcm_substream *substream, err = snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(hw_params)); - if (err >= 0) { - prm->dma_bytes = substream->runtime->dma_bytes; - prm->dma_area = substream->runtime->dma_area; + if (err >= 0) prm->period_size = params_period_bytes(hw_params); - } return err; } @@ -279,8 +277,6 @@ static int uac_pcm_hw_free(struct snd_pcm_substream *substream) else prm = &uac->c_prm; - prm->dma_area = NULL; - prm->dma_bytes = 0; prm->period_size = 0; return snd_pcm_lib_free_pages(substream); -- GitLab From c7d18686e87a62167fa04472c4b0ca16e77acdc7 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Thu, 21 Jun 2018 17:22:50 +0200 Subject: [PATCH 1055/1291] usb: gadget: u_audio: remove cached period bytes value [ Upstream commit 773e53d50e227b0c03d0bb434c1636f6c49c75b2 ] Substream period size potentially can be changed in runtime, however this is not accounted in the data copying routine, the change replaces the cached value with an actual value from substream runtime. As a side effect the change also removes a potential division by zero in u_audio_iso_complete() function, if there is a race with uac_pcm_hw_free(), which sets prm->period_size to 0. Fixes: 132fcb460839 ("usb: gadget: Add Audio Class 2.0 Driver") Signed-off-by: Vladimir Zapolskiy Signed-off-by: Eugeniu Rosca Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_audio.c | 40 ++++----------------------- 1 file changed, 5 insertions(+), 35 deletions(-) diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 717656e17afa..e9644137f720 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -49,8 +49,6 @@ struct uac_rtd_params { void *rbuf; - size_t period_size; - unsigned max_psize; /* MaxPacketSize of endpoint */ struct uac_req *ureq; @@ -92,7 +90,6 @@ static void u_audio_iso_complete(struct usb_ep *ep, struct usb_request *req) unsigned pending; unsigned long flags; unsigned int hw_ptr; - bool update_alsa = false; int status = req->status; struct uac_req *ur = req->context; struct snd_pcm_substream *substream; @@ -145,11 +142,6 @@ static void u_audio_iso_complete(struct usb_ep *ep, struct usb_request *req) req->actual = req->length; } - pending = prm->hw_ptr % prm->period_size; - pending += req->actual; - if (pending >= prm->period_size) - update_alsa = true; - hw_ptr = prm->hw_ptr; spin_unlock_irqrestore(&prm->lock, flags); @@ -180,14 +172,15 @@ static void u_audio_iso_complete(struct usb_ep *ep, struct usb_request *req) spin_lock_irqsave(&prm->lock, flags); /* update hw_ptr after data is copied to memory */ prm->hw_ptr = (hw_ptr + req->actual) % runtime->dma_bytes; + hw_ptr = prm->hw_ptr; spin_unlock_irqrestore(&prm->lock, flags); + if ((hw_ptr % snd_pcm_lib_period_bytes(substream)) < req->actual) + snd_pcm_period_elapsed(substream); + exit: if (usb_ep_queue(ep, req, GFP_ATOMIC)) dev_err(uac->card->dev, "%d Error!\n", __LINE__); - - if (update_alsa) - snd_pcm_period_elapsed(substream); } static int uac_pcm_trigger(struct snd_pcm_substream *substream, int cmd) @@ -250,35 +243,12 @@ static snd_pcm_uframes_t uac_pcm_pointer(struct snd_pcm_substream *substream) static int uac_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params) { - struct snd_uac_chip *uac = snd_pcm_substream_chip(substream); - struct uac_rtd_params *prm; - int err; - - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - prm = &uac->p_prm; - else - prm = &uac->c_prm; - - err = snd_pcm_lib_malloc_pages(substream, + return snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(hw_params)); - if (err >= 0) - prm->period_size = params_period_bytes(hw_params); - - return err; } static int uac_pcm_hw_free(struct snd_pcm_substream *substream) { - struct snd_uac_chip *uac = snd_pcm_substream_chip(substream); - struct uac_rtd_params *prm; - - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - prm = &uac->p_prm; - else - prm = &uac->c_prm; - - prm->period_size = 0; - return snd_pcm_lib_free_pages(substream); } -- GitLab From a362655deb0e15a55dafda8e875e0a1f1b97c92d Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Thu, 21 Jun 2018 17:22:52 +0200 Subject: [PATCH 1056/1291] usb: gadget: u_audio: protect stream runtime fields with stream spinlock [ Upstream commit 56bc61587daadef67712068f251c4ef2e3932d94 ] The change protects almost the whole body of u_audio_iso_complete() function by PCM stream lock, this is mainly sufficient to avoid a race between USB request completion and stream termination, the change prevents a possibility of invalid memory access in interrupt context by memcpy(): Unable to handle kernel paging request at virtual address 00004e80 pgd = c0004000 [00004e80] *pgd=00000000 Internal error: Oops: 817 [#1] PREEMPT SMP ARM CPU: 0 PID: 3 Comm: ksoftirqd/0 Tainted: G C 3.14.54+ #117 task: da180b80 ti: da192000 task.ti: da192000 PC is at memcpy+0x50/0x330 LR is at 0xcdd92b0e pc : [] lr : [] psr: 20000193 sp : da193ce4 ip : dd86ae26 fp : 0000b180 r10: daf81680 r9 : 00000000 r8 : d58a01ea r7 : 2c0b43e4 r6 : acdfb08b r5 : 01a271cf r4 : 87389377 r3 : 69469782 r2 : 00000020 r1 : daf82fe0 r0 : 00004e80 Flags: nzCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment kernel Control: 10c5387d Table: 2b70804a DAC: 00000015 Process ksoftirqd/0 (pid: 3, stack limit = 0xda192238) Also added a check for potential !runtime condition, commonly it is done by PCM_RUNTIME_CHECK(substream) in the beginning, however this does not completely prevent from oopses in u_audio_iso_complete(), because the proper protection scheme must be implemented in PCM library functions. An example of *not fixed* oops due to substream->runtime->* dereference by snd_pcm_running(substream) from snd_pcm_period_elapsed(), where substream->runtime is gone while waiting the substream lock: Unable to handle kernel paging request at virtual address 6b6b6b6b pgd = db7e4000 [6b6b6b6b] *pgd=00000000 CPU: 0 PID: 193 Comm: klogd Tainted: G C 3.14.54+ #118 task: db5ac500 ti: db60c000 task.ti: db60c000 PC is at snd_pcm_period_elapsed+0x48/0xd8 [snd_pcm] LR is at snd_pcm_period_elapsed+0x40/0xd8 [snd_pcm] pc : [<>] lr : [<>] psr: 60000193 Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user Control: 10c5387d Table: 2b7e404a DAC: 00000015 Process klogd (pid: 193, stack limit = 0xdb60c238) [<>] (snd_pcm_period_elapsed [snd_pcm]) from [<>] (udc_irq+0x500/0xbbc) [<>] (udc_irq) from [<>] (ci_irq+0x280/0x304) [<>] (ci_irq) from [<>] (handle_irq_event_percpu+0xa4/0x40c) [<>] (handle_irq_event_percpu) from [<>] (handle_irq_event+0x3c/0x5c) [<>] (handle_irq_event) from [<>] (handle_fasteoi_irq+0xc4/0x110) [<>] (handle_fasteoi_irq) from [<>] (generic_handle_irq+0x20/0x30) [<>] (generic_handle_irq) from [<>] (handle_IRQ+0x80/0xc0) [<>] (handle_IRQ) from [<>] (gic_handle_irq+0x3c/0x60) [<>] (gic_handle_irq) from [<>] (__irq_svc+0x44/0x78) Signed-off-by: Vladimir Zapolskiy [erosca: W/o this patch, with minimal instrumentation [1], I can consistently reproduce BUG: KASAN: use-after-free [2]] [1] Instrumentation to reproduce issue [2]: # diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c # index a72295c953bb..bd0b308024fe 100644 # --- a/drivers/usb/gadget/function/u_audio.c # +++ b/drivers/usb/gadget/function/u_audio.c # @@ -16,6 +16,7 @@ # #include # #include # #include # +#include # # #include "u_audio.h" # # @@ -147,6 +148,8 @@ static void u_audio_iso_complete(struct usb_ep *ep, struct usb_request *req) # # spin_unlock_irqrestore(&prm->lock, flags); # # + udelay(500); //delay here to increase probability of parallel activities # + # /* Pack USB load in ALSA ring buffer */ # pending = prm->dma_bytes - hw_ptr; [2] After applying [1], below BUG occurs on Rcar-H3-Salvator-X board: ================================================================== BUG: KASAN: use-after-free in u_audio_iso_complete+0x24c/0x520 [u_audio] Read of size 8 at addr ffff8006cafcc248 by task swapper/0/0 CPU: 0 PID: 0 Comm: swapper/0 Tainted: G WC 4.14.47+ #160 Hardware name: Renesas Salvator-X board based on r8a7795 ES2.0+ (DT) Call trace: [] dump_backtrace+0x0/0x364 [] show_stack+0x14/0x1c [] dump_stack+0x108/0x174 [] print_address_description+0x7c/0x32c [] kasan_report+0x324/0x354 [] __asan_load8+0x24/0x94 [] u_audio_iso_complete+0x24c/0x520 [u_audio] [] usb_gadget_giveback_request+0x480/0x4d0 [udc_core] [] usbhsg_queue_done+0x100/0x130 [renesas_usbhs] [] usbhsf_pkt_handler+0x1a4/0x298 [renesas_usbhs] [] usbhsf_irq_ready+0x128/0x178 [renesas_usbhs] [] usbhs_interrupt+0x440/0x490 [renesas_usbhs] [] __handle_irq_event_percpu+0x594/0xa58 [] handle_irq_event_percpu+0x84/0x12c [] handle_irq_event+0xb0/0x10c [] handle_fasteoi_irq+0x1e0/0x2ec [] generic_handle_irq+0x2c/0x44 [] __handle_domain_irq+0x190/0x194 [] gic_handle_irq+0x80/0xac Exception stack(0xffff200009e97c80 to 0xffff200009e97dc0) 7c80: 0000000000000000 0000000000000000 0000000000000003 ffff200008179298 7ca0: ffff20000ae1c180 dfff200000000000 0000000000000000 ffff2000081f9a88 7cc0: ffff200009eb5960 ffff200009e97cf0 0000000000001600 ffff0400041b064b 7ce0: 0000000000000000 0000000000000002 0000000200000001 0000000000000001 7d00: ffff20000842197c 0000ffff958c4970 0000000000000000 ffff8006da0d5b80 7d20: ffff8006d4678498 0000000000000000 000000126bde0a8b ffff8006d4678480 7d40: 0000000000000000 000000126bdbea64 ffff200008fd0000 ffff8006fffff980 7d60: 00000000495f0018 ffff200009e97dc0 ffff200008b6c4ec ffff200009e97dc0 7d80: ffff200008b6c4f0 0000000020000145 ffff8006da0d5b80 ffff8006d4678498 7da0: ffffffffffffffff ffff8006d4678498 ffff200009e97dc0 ffff200008b6c4f0 [] el1_irq+0xb4/0x12c [] cpuidle_enter_state+0x818/0x844 [] cpuidle_enter+0x18/0x20 [] call_cpuidle+0x98/0x9c [] do_idle+0x214/0x264 [] cpu_startup_entry+0x20/0x24 [] rest_init+0x30c/0x320 [] start_kernel+0x570/0x5b0 ---<-snip->--- Fixes: 132fcb460839 ("usb: gadget: Add Audio Class 2.0 Driver") Signed-off-by: Eugeniu Rosca Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_audio.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index e9644137f720..d3a639297e06 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -88,7 +88,7 @@ static const struct snd_pcm_hardware uac_pcm_hardware = { static void u_audio_iso_complete(struct usb_ep *ep, struct usb_request *req) { unsigned pending; - unsigned long flags; + unsigned long flags, flags2; unsigned int hw_ptr; int status = req->status; struct uac_req *ur = req->context; @@ -115,7 +115,14 @@ static void u_audio_iso_complete(struct usb_ep *ep, struct usb_request *req) if (!substream) goto exit; + snd_pcm_stream_lock_irqsave(substream, flags2); + runtime = substream->runtime; + if (!runtime || !snd_pcm_running(substream)) { + snd_pcm_stream_unlock_irqrestore(substream, flags2); + goto exit; + } + spin_lock_irqsave(&prm->lock, flags); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { @@ -174,6 +181,7 @@ static void u_audio_iso_complete(struct usb_ep *ep, struct usb_request *req) prm->hw_ptr = (hw_ptr + req->actual) % runtime->dma_bytes; hw_ptr = prm->hw_ptr; spin_unlock_irqrestore(&prm->lock, flags); + snd_pcm_stream_unlock_irqrestore(substream, flags2); if ((hw_ptr % snd_pcm_lib_period_bytes(substream)) < req->actual) snd_pcm_period_elapsed(substream); -- GitLab From 645fef5e8ddeac591556d9e13d0cf0ea50c6fca6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 15 Jul 2018 10:37:37 -0700 Subject: [PATCH 1057/1291] usb/phy: fix PPC64 build errors in phy-fsl-usb.c [ Upstream commit a39ba90a1cc7010edb0a7132e1b67f3d80b994e9 ] Fix build errors when built for PPC64: These variables are only used on PPC32 so they don't need to be initialized for PPC64. ../drivers/usb/phy/phy-fsl-usb.c: In function 'usb_otg_start': ../drivers/usb/phy/phy-fsl-usb.c:865:3: error: '_fsl_readl' undeclared (first use in this function); did you mean 'fsl_readl'? _fsl_readl = _fsl_readl_be; ../drivers/usb/phy/phy-fsl-usb.c:865:16: error: '_fsl_readl_be' undeclared (first use in this function); did you mean 'fsl_readl'? _fsl_readl = _fsl_readl_be; ../drivers/usb/phy/phy-fsl-usb.c:866:3: error: '_fsl_writel' undeclared (first use in this function); did you mean 'fsl_writel'? _fsl_writel = _fsl_writel_be; ../drivers/usb/phy/phy-fsl-usb.c:866:17: error: '_fsl_writel_be' undeclared (first use in this function); did you mean 'fsl_writel'? _fsl_writel = _fsl_writel_be; ../drivers/usb/phy/phy-fsl-usb.c:868:16: error: '_fsl_readl_le' undeclared (first use in this function); did you mean 'fsl_readl'? _fsl_readl = _fsl_readl_le; ../drivers/usb/phy/phy-fsl-usb.c:869:17: error: '_fsl_writel_le' undeclared (first use in this function); did you mean 'fsl_writel'? _fsl_writel = _fsl_writel_le; and the sysfs "show" function return type should be ssize_t, not int: ../drivers/usb/phy/phy-fsl-usb.c:1042:49: error: initialization of 'ssize_t (*)(struct device *, struct device_attribute *, char *)' {aka 'long int (*)(struct device *, struct device_attribute *, char *)'} from incompatible pointer type 'int (*)(struct device *, struct device_attribute *, char *)' [-Werror=incompatible-pointer-types] static DEVICE_ATTR(fsl_usb2_otg_state, S_IRUGO, show_fsl_usb2_otg_state, NULL); Signed-off-by: Randy Dunlap Cc: Felipe Balbi Cc: linux-usb@vger.kernel.org Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-fsl-usb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/phy/phy-fsl-usb.c b/drivers/usb/phy/phy-fsl-usb.c index cf8f40ae6e01..9b4354a00ca7 100644 --- a/drivers/usb/phy/phy-fsl-usb.c +++ b/drivers/usb/phy/phy-fsl-usb.c @@ -874,6 +874,7 @@ int usb_otg_start(struct platform_device *pdev) if (pdata->init && pdata->init(pdev) != 0) return -EINVAL; +#ifdef CONFIG_PPC32 if (pdata->big_endian_mmio) { _fsl_readl = _fsl_readl_be; _fsl_writel = _fsl_writel_be; @@ -881,6 +882,7 @@ int usb_otg_start(struct platform_device *pdev) _fsl_readl = _fsl_readl_le; _fsl_writel = _fsl_writel_le; } +#endif /* request irq */ p_otg->irq = platform_get_irq(pdev, 0); @@ -971,7 +973,7 @@ int usb_otg_start(struct platform_device *pdev) /* * state file in sysfs */ -static int show_fsl_usb2_otg_state(struct device *dev, +static ssize_t show_fsl_usb2_otg_state(struct device *dev, struct device_attribute *attr, char *buf) { struct otg_fsm *fsm = &fsl_otg_dev->fsm; -- GitLab From 7cd80fc138f289aa25fba5dffc1542e4111f34d1 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Tue, 10 Jul 2018 16:01:45 +0200 Subject: [PATCH 1058/1291] tools: usb: ffs-test: Fix build on big endian systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a2b22dddc7bb6110ac3b5ed1a60aa9279836fadb ] The tools/usb/ffs-test.c file defines cpu_to_le16/32 by using the C library htole16/32 function calls. However, cpu_to_le16/32 are used when initializing structures, i.e in a context where a function call is not allowed. It works fine on little endian systems because htole16/32 are defined by the C library as no-ops. But on big-endian systems, they are actually doing something, which might involve calling a function, causing build failures, such as: ffs-test.c:48:25: error: initializer element is not constant #define cpu_to_le32(x) htole32(x) ^~~~~~~ ffs-test.c:128:12: note: in expansion of macro ‘cpu_to_le32’ .magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2), ^~~~~~~~~~~ To solve this, we code cpu_to_le16/32 in a way that allows them to be used when initializing structures. This fix was imported from meta-openembedded/android-tools/fix-big-endian-build.patch written by Thomas Petazzoni . CC: Thomas Petazzoni Signed-off-by: Peter Senna Tschudin Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/usb/ffs-test.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tools/usb/ffs-test.c b/tools/usb/ffs-test.c index 95dd14648ba5..0f395dfb7774 100644 --- a/tools/usb/ffs-test.c +++ b/tools/usb/ffs-test.c @@ -44,12 +44,25 @@ /******************** Little Endian Handling ********************************/ -#define cpu_to_le16(x) htole16(x) -#define cpu_to_le32(x) htole32(x) +/* + * cpu_to_le16/32 are used when initializing structures, a context where a + * function call is not allowed. To solve this, we code cpu_to_le16/32 in a way + * that allows them to be used when initializing structures. + */ + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define cpu_to_le16(x) (x) +#define cpu_to_le32(x) (x) +#else +#define cpu_to_le16(x) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)) +#define cpu_to_le32(x) \ + ((((x) & 0xff000000u) >> 24) | (((x) & 0x00ff0000u) >> 8) | \ + (((x) & 0x0000ff00u) << 8) | (((x) & 0x000000ffu) << 24)) +#endif + #define le32_to_cpu(x) le32toh(x) #define le16_to_cpu(x) le16toh(x) - /******************** Messages and Errors ***********************************/ static const char argv0[] = "ffs-test"; -- GitLab From 70e88fef36ec16b25ba2f88258e4bd38990cbd61 Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Mon, 2 Jul 2018 23:46:47 +0200 Subject: [PATCH 1059/1291] usb: gadget: f_uac2: fix endianness of 'struct cntrl_*_lay3' [ Upstream commit eec24f2a0d4dc3b1d95a3ccd2feb523ede3ba775 ] The list [1] of commits doing endianness fixes in USB subsystem is long due to below quote from USB spec Revision 2.0 from April 27, 2000: ------------ 8.1 Byte/Bit Ordering Multiple byte fields in standard descriptors, requests, and responses are interpreted as and moved over the bus in little-endian order, i.e. LSB to MSB. ------------ This commit belongs to the same family. [1] Example of endianness fixes in USB subsystem: commit 14e1d56cbea6 ("usb: gadget: f_uac2: endianness fixes.") commit 42370b821168 ("usb: gadget: f_uac1: endianness fixes.") commit 63afd5cc7877 ("USB: chaoskey: fix Alea quirk on big-endian hosts") commit 74098c4ac782 ("usb: gadget: acm: fix endianness in notifications") commit cdd7928df0d2 ("ACM gadget: fix endianness in notifications") commit 323ece54e076 ("cdc-wdm: fix endianness bug in debug statements") commit e102609f1072 ("usb: gadget: uvc: Fix endianness mismatches") list goes on Fixes: 132fcb460839 ("usb: gadget: Add Audio Class 2.0 Driver") Signed-off-by: Eugeniu Rosca Reviewed-by: Ruslan Bilovol Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index c6ce178775b6..d063f0401f84 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -442,14 +442,14 @@ static struct usb_descriptor_header *hs_audio_desc[] = { }; struct cntrl_cur_lay3 { - __u32 dCUR; + __le32 dCUR; }; struct cntrl_range_lay3 { - __u16 wNumSubRanges; - __u32 dMIN; - __u32 dMAX; - __u32 dRES; + __le16 wNumSubRanges; + __le32 dMIN; + __le32 dMAX; + __le32 dRES; } __packed; static void set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, @@ -707,9 +707,9 @@ in_rq_cur(struct usb_function *fn, const struct usb_ctrlrequest *cr) memset(&c, 0, sizeof(struct cntrl_cur_lay3)); if (entity_id == USB_IN_CLK_ID) - c.dCUR = p_srate; + c.dCUR = cpu_to_le32(p_srate); else if (entity_id == USB_OUT_CLK_ID) - c.dCUR = c_srate; + c.dCUR = cpu_to_le32(c_srate); value = min_t(unsigned, w_length, sizeof c); memcpy(req->buf, &c, value); @@ -746,15 +746,15 @@ in_rq_range(struct usb_function *fn, const struct usb_ctrlrequest *cr) if (control_selector == UAC2_CS_CONTROL_SAM_FREQ) { if (entity_id == USB_IN_CLK_ID) - r.dMIN = p_srate; + r.dMIN = cpu_to_le32(p_srate); else if (entity_id == USB_OUT_CLK_ID) - r.dMIN = c_srate; + r.dMIN = cpu_to_le32(c_srate); else return -EOPNOTSUPP; r.dMAX = r.dMIN; r.dRES = 0; - r.wNumSubRanges = 1; + r.wNumSubRanges = cpu_to_le16(1); value = min_t(unsigned, w_length, sizeof r); memcpy(req->buf, &r, value); -- GitLab From a685c4c4d6e8bd6c3e4b39330e5d09f2446de8b9 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 10 Jul 2018 23:21:08 +0900 Subject: [PATCH 1060/1291] netfilter: nft_set_hash: add rcu_barrier() in the nft_rhash_destroy() [ Upstream commit 9970a8e40d4c39e23d62d32540366d1d7d2cce9b ] GC of set uses call_rcu() to destroy elements. So that elements would be destroyed after destroying sets and chains. But, elements should be destroyed before destroying sets and chains. In order to wait calling call_rcu(), a rcu_barrier() is added. In order to test correctly, below patch should be applied. https://patchwork.ozlabs.org/patch/940883/ test scripts: %cat test.nft table ip aa { map map1 { type ipv4_addr : verdict; flags timeout; elements = { 0 : jump a0, 1 : jump a0, 2 : jump a0, 3 : jump a0, 4 : jump a0, 5 : jump a0, 6 : jump a0, 7 : jump a0, 8 : jump a0, 9 : jump a0, } timeout 1s; } chain a0 { } } flush ruleset [ ... ] table ip aa { map map1 { type ipv4_addr : verdict; flags timeout; elements = { 0 : jump a0, 1 : jump a0, 2 : jump a0, 3 : jump a0, 4 : jump a0, 5 : jump a0, 6 : jump a0, 7 : jump a0, 8 : jump a0, 9 : jump a0, } timeout 1s; } chain a0 { } } flush ruleset Splat looks like: [ 200.795603] kernel BUG at net/netfilter/nf_tables_api.c:1363! [ 200.806944] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 200.812253] CPU: 1 PID: 1582 Comm: nft Not tainted 4.17.0+ #24 [ 200.820297] Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 07/08/2015 [ 200.830309] RIP: 0010:nf_tables_chain_destroy.isra.34+0x62/0x240 [nf_tables] [ 200.838317] Code: 43 50 85 c0 74 26 48 8b 45 00 48 8b 4d 08 ba 54 05 00 00 48 c7 c6 60 6d 29 c0 48 c7 c7 c0 65 29 c0 4c 8b 40 08 e8 58 e5 fd f8 <0f> 0b 48 89 da 48 b8 00 00 00 00 00 fc ff [ 200.860366] RSP: 0000:ffff880118dbf4d0 EFLAGS: 00010282 [ 200.866354] RAX: 0000000000000061 RBX: ffff88010cdeaf08 RCX: 0000000000000000 [ 200.874355] RDX: 0000000000000061 RSI: 0000000000000008 RDI: ffffed00231b7e90 [ 200.882361] RBP: ffff880118dbf4e8 R08: ffffed002373bcfb R09: ffffed002373bcfa [ 200.890354] R10: 0000000000000000 R11: ffffed002373bcfb R12: dead000000000200 [ 200.898356] R13: dead000000000100 R14: ffffffffbb62af38 R15: dffffc0000000000 [ 200.906354] FS: 00007fefc31fd700(0000) GS:ffff88011b800000(0000) knlGS:0000000000000000 [ 200.915533] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 200.922355] CR2: 0000557f1c8e9128 CR3: 0000000106880000 CR4: 00000000001006e0 [ 200.930353] Call Trace: [ 200.932351] ? nf_tables_commit+0x26f6/0x2c60 [nf_tables] [ 200.939525] ? nf_tables_setelem_notify.constprop.49+0x1a0/0x1a0 [nf_tables] [ 200.947525] ? nf_tables_delchain+0x6e0/0x6e0 [nf_tables] [ 200.952383] ? nft_add_set_elem+0x1700/0x1700 [nf_tables] [ 200.959532] ? nla_parse+0xab/0x230 [ 200.963529] ? nfnetlink_rcv_batch+0xd06/0x10d0 [nfnetlink] [ 200.968384] ? nfnetlink_net_init+0x130/0x130 [nfnetlink] [ 200.975525] ? debug_show_all_locks+0x290/0x290 [ 200.980363] ? debug_show_all_locks+0x290/0x290 [ 200.986356] ? sched_clock_cpu+0x132/0x170 [ 200.990352] ? find_held_lock+0x39/0x1b0 [ 200.994355] ? sched_clock_local+0x10d/0x130 [ 200.999531] ? memset+0x1f/0x40 Fixes: 9d0982927e79 ("netfilter: nft_hash: add support for timeouts") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_set_hash.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 9c0d5a7ce5f9..33aa2ac3a62e 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -359,6 +359,7 @@ static void nft_rhash_destroy(const struct nft_set *set) struct nft_rhash *priv = nft_set_priv(set); cancel_delayed_work_sync(&priv->gc_work); + rcu_barrier(); rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, (void *)set); } -- GitLab From e3476a6da5d87215ca6e1c9629fec6b5e73c785d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 19 Jul 2018 18:18:35 +0200 Subject: [PATCH 1061/1291] bpf, ppc64: fix unexpected r0=0 exit path inside bpf_xadd [ Upstream commit b9c1e60e7bf4e64ac1b4f4d6d593f0bb57886973 ] None of the JITs is allowed to implement exit paths from the BPF insn mappings other than BPF_JMP | BPF_EXIT. In the BPF core code we have a couple of rewrites in eBPF (e.g. LD_ABS / LD_IND) and in eBPF to cBPF translation to retain old existing behavior where exceptions may occur; they are also tightly controlled by the verifier where it disallows some of the features such as BPF to BPF calls when legacy LD_ABS / LD_IND ops are present in the BPF program. During recent review of all BPF_XADD JIT implementations I noticed that the ppc64 one is buggy in that it contains two jumps to exit paths. This is problematic as this can bypass verifier expectations e.g. pointed out in commit f6b1b3bf0d5f ("bpf: fix subprog verifier bypass by div/mod by 0 exception"). The first exit path is obsoleted by the fix in ca36960211eb ("bpf: allow xadd only on aligned memory") anyway, and for the second one we need to do a fetch, add and store loop if the reservation from lwarx/ldarx was lost in the meantime. Fixes: 156d0e290e96 ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF") Reviewed-by: Naveen N. Rao Reviewed-by: Sandipan Das Tested-by: Sandipan Das Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/net/bpf_jit_comp64.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 254634fb3fc7..fee1e1f8c9d3 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -322,6 +322,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u64 imm64; u8 *func; u32 true_cond; + u32 tmp_idx; /* * addrs[] maps a BPF bytecode address into a real offset from @@ -681,11 +682,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_STX | BPF_XADD | BPF_W: /* Get EA into TMP_REG_1 */ PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); - /* error if EA is not word-aligned */ - PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x03); - PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + 12); - PPC_LI(b2p[BPF_REG_0], 0); - PPC_JMP(exit_addr); + tmp_idx = ctx->idx * 4; /* load value from memory into TMP_REG_2 */ PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); /* add value from src_reg into this */ @@ -693,32 +690,16 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, /* store result back */ PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); /* we're done if this succeeded */ - PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4)); - /* otherwise, let's try once more */ - PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); - PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); - PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); - /* exit if the store was not successful */ - PPC_LI(b2p[BPF_REG_0], 0); - PPC_BCC(COND_NE, exit_addr); + PPC_BCC_SHORT(COND_NE, tmp_idx); break; /* *(u64 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_DW: PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); - /* error if EA is not doubleword-aligned */ - PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x07); - PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (3*4)); - PPC_LI(b2p[BPF_REG_0], 0); - PPC_JMP(exit_addr); - PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); - PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); - PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); - PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4)); + tmp_idx = ctx->idx * 4; PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); - PPC_LI(b2p[BPF_REG_0], 0); - PPC_BCC(COND_NE, exit_addr); + PPC_BCC_SHORT(COND_NE, tmp_idx); break; /* -- GitLab From 4a0144a43c526e6d8d274fce2e4241854ef8712c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 17 Jul 2018 07:17:55 +0200 Subject: [PATCH 1062/1291] netfilter: nf_tables: fix memory leaks on chain rename [ Upstream commit 9f8aac0be21ed5f99bd5ba0ff315d710737d1794 ] The new name is stored in the transaction metadata, on commit, the pointers to the old and new names are swapped. Therefore in abort and commit case we have to free the pointer in the chain_trans container. In commit case, the pointer can be used by another cpu that is currently dumping the renamed chain, thus kfree needs to happen after waiting for rcu readers to complete. Fixes: b7263e071a ("netfilter: nf_tables: Allow chain name of up to 255 chars") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9a945024a0b6..194674c96b98 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5043,6 +5043,9 @@ static void nf_tables_commit_release(struct nft_trans *trans) case NFT_MSG_DELTABLE: nf_tables_table_destroy(&trans->ctx); break; + case NFT_MSG_NEWCHAIN: + kfree(nft_trans_chain_name(trans)); + break; case NFT_MSG_DELCHAIN: nf_tables_chain_destroy(trans->ctx.chain); break; @@ -5100,13 +5103,15 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE); break; case NFT_MSG_NEWCHAIN: - if (nft_trans_chain_update(trans)) + if (nft_trans_chain_update(trans)) { nft_chain_commit_update(trans); - else + nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); + /* trans destroyed after rcu grace period */ + } else { nft_clear(net, trans->ctx.chain); - - nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); - nft_trans_destroy(trans); + nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); + nft_trans_destroy(trans); + } break; case NFT_MSG_DELCHAIN: list_del_rcu(&trans->ctx.chain->list); @@ -5246,7 +5251,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { free_percpu(nft_trans_chain_stats(trans)); - + kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); } else { trans->ctx.table->use--; -- GitLab From 123534dbd49094b9f68e53fffd8cbd8a9f9f976e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 17 Jul 2018 07:17:56 +0200 Subject: [PATCH 1063/1291] netfilter: nf_tables: don't allow to rename to already-pending name [ Upstream commit c6cc94df65c3174be92afbee638f11cbb5e606a7 ] Its possible to rename two chains to the same name in one transaction: nft add chain t c1 nft add chain t c2 nft 'rename chain t c1 c3;rename chain t c2 c3' This creates two chains named 'c3'. Appears to be harmless, both chains can still be deleted both by name or handle, but, nevertheless, its a bug. Walk transaction log and also compare vs. the pending renames. Both chains can still be deleted, but nevertheless it is a bug as we don't allow to create chains with identical names, so we should prevent this from happening-by-rename too. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 42 ++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 194674c96b98..742aacb317e5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1480,7 +1480,6 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, struct nft_base_chain *basechain; struct nft_stats *stats = NULL; struct nft_chain_hook hook; - const struct nlattr *name; struct nf_hook_ops *ops; struct nft_trans *trans; int err, i; @@ -1531,12 +1530,11 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, return PTR_ERR(stats); } + err = -ENOMEM; trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN, sizeof(struct nft_trans_chain)); - if (trans == NULL) { - free_percpu(stats); - return -ENOMEM; - } + if (trans == NULL) + goto err; nft_trans_chain_stats(trans) = stats; nft_trans_chain_update(trans) = true; @@ -1546,19 +1544,37 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, else nft_trans_chain_policy(trans) = -1; - name = nla[NFTA_CHAIN_NAME]; - if (nla[NFTA_CHAIN_HANDLE] && name) { - nft_trans_chain_name(trans) = - nla_strdup(name, GFP_KERNEL); - if (!nft_trans_chain_name(trans)) { - kfree(trans); - free_percpu(stats); - return -ENOMEM; + if (nla[NFTA_CHAIN_HANDLE] && + nla[NFTA_CHAIN_NAME]) { + struct nft_trans *tmp; + char *name; + + err = -ENOMEM; + name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL); + if (!name) + goto err; + + err = -EEXIST; + list_for_each_entry(tmp, &ctx->net->nft.commit_list, list) { + if (tmp->msg_type == NFT_MSG_NEWCHAIN && + tmp->ctx.table == table && + nft_trans_chain_update(tmp) && + nft_trans_chain_name(tmp) && + strcmp(name, nft_trans_chain_name(tmp)) == 0) { + kfree(name); + goto err; + } } + + nft_trans_chain_name(trans) = name; } list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; +err: + free_percpu(stats); + kfree(trans); + return err; } static int nf_tables_newchain(struct net *net, struct sock *nlsk, -- GitLab From 10ca6b3f92d3907a4b25ab198c7b5c84aefa3bc7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 19 Jul 2018 10:31:00 -0700 Subject: [PATCH 1064/1291] KVM: vmx: use local variable for current_vmptr when emulating VMPTRST [ Upstream commit 0a06d4256674c4e041945b52044941995fee237d ] Do not expose the address of vmx->nested.current_vmptr to kvm_write_guest_virt_system() as the resulting __copy_to_user() call will trigger a WARN when CONFIG_HARDENED_USERCOPY is enabled. Opportunistically clean up variable names in handle_vmptrst() to improve readability, e.g. vmcs_gva is misleading as the memory operand of VMPTRST is plain memory, not a VMCS. Signed-off-by: Sean Christopherson Tested-by: Peter Shier Reviewed-by: Peter Shier Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f015ca3997d9..31bb200eb00c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8108,21 +8108,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) /* Emulate the VMPTRST instruction */ static int handle_vmptrst(struct kvm_vcpu *vcpu) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - gva_t vmcs_gva; + unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION); + u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); + gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr; struct x86_exception e; + gva_t gva; if (!nested_vmx_check_permission(vcpu)) return 1; - if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, true, &vmcs_gva)) + if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva)) return 1; /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ - if (kvm_write_guest_virt_system(vcpu, vmcs_gva, - (void *)&to_vmx(vcpu)->nested.current_vmptr, - sizeof(u64), &e)) { + if (kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr, + sizeof(gpa_t), &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } -- GitLab From a10170d94ed4954f62e2cf73ce7cf5c531790163 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 20 Jul 2018 14:47:03 -0400 Subject: [PATCH 1065/1291] tools/power turbostat: fix -S on UP systems [ Upstream commit 9d83601a9cc1884d1b5706ee2acc661d558c6838 ] The -S (system summary) option failed to print any data on a 1-processor system. Reported-by: Artem Bityutskiy Signed-off-by: Len Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/power/x86/turbostat/turbostat.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index bd9c6b31a504..b28de3ad3907 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1038,9 +1038,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ if (!printed || !summary_only) print_header("\t"); - if (topo.num_cpus > 1) - format_counters(&average.threads, &average.cores, - &average.packages); + format_counters(&average.threads, &average.cores, &average.packages); printed = 1; -- GitLab From b970d8a1c213d8f030feddb3cbf934df4302eb47 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 19 Jul 2018 10:27:13 +0800 Subject: [PATCH 1066/1291] net: caif: Add a missing rcu_read_unlock() in caif_flow_cb [ Upstream commit 64119e05f7b31e83e2555f6782e6cdc8f81c63f4 ] Add a missing rcu_read_unlock in the error path Fixes: c95567c80352 ("caif: added check for potential null return") Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/caif/caif_dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 2d38b6e34203..98b62a7990aa 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -131,8 +131,10 @@ static void caif_flow_cb(struct sk_buff *skb) caifd = caif_get(skb->dev); WARN_ON(caifd == NULL); - if (caifd == NULL) + if (!caifd) { + rcu_read_unlock(); return; + } caifd_hold(caifd); rcu_read_unlock(); -- GitLab From 77c65d5f40c6979244eee3d4f51fa2a213c73307 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 18 Jul 2018 22:50:02 -0700 Subject: [PATCH 1067/1291] qed: Fix link flap issue due to mismatching EEE capabilities. [ Upstream commit 4ad95a93a702ec4f4fb5159b822797ba67b8cbbe ] Apparently, MFW publishes EEE capabilities even for Fiber-boards that don't support them, and later since qed internally sets adv_caps it would cause link-flap avoidance (LFA) to fail when driver would initiate the link. This in turn delays the link, causing traffic to fail. Driver has been modified to not to ask MFW for any EEE config if EEE isn't to be enabled. Fixes: 645874e5 ("qed: Add support for Energy efficient ethernet.") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 376485d99357..c0674bcd9a83 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1279,9 +1279,15 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up) phy_cfg.pause |= (params->pause.forced_tx) ? ETH_PAUSE_TX : 0; phy_cfg.adv_speed = params->speed.advertised_speeds; phy_cfg.loopback_mode = params->loopback_mode; - if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) { - if (params->eee.enable) - phy_cfg.eee_cfg |= EEE_CFG_EEE_ENABLED; + + /* There are MFWs that share this capability regardless of whether + * this is feasible or not. And given that at the very least adv_caps + * would be set internally by qed, we want to make sure LFA would + * still work. + */ + if ((p_hwfn->mcp_info->capabilities & + FW_MB_PARAM_FEATURE_SUPPORT_EEE) && params->eee.enable) { + phy_cfg.eee_cfg |= EEE_CFG_EEE_ENABLED; if (params->eee.tx_lpi_enable) phy_cfg.eee_cfg |= EEE_CFG_TX_LPI; if (params->eee.adv_caps & QED_EEE_1G_ADV) -- GitLab From f4e284f1db9e62e2506c08ef59a4f0810b99b2f2 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 18 Jul 2018 22:50:03 -0700 Subject: [PATCH 1068/1291] qed: Fix possible race for the link state value. [ Upstream commit 58874c7b246109d8efb2b0099d1aa296d6bfc3fa ] There's a possible race where driver can read link status in mid-transition and see that virtual-link is up yet speed is 0. Since in this mid-transition we're guaranteed to see a mailbox from MFW soon, we can afford to treat this as link down. Fixes: cc875c2e ("qed: Add link support") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index c0674bcd9a83..3c469355f5a4 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1182,6 +1182,7 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, break; default: p_link->speed = 0; + p_link->link_up = 0; } if (p_link->link_up && p_link->speed) -- GitLab From ffb34418ca94f78feb84bf87317c679fcd7ed86b Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 18 Jul 2018 22:50:04 -0700 Subject: [PATCH 1069/1291] qed: Correct Multicast API to reflect existence of 256 approximate buckets. [ Upstream commit 25c020a90919632b3425c19dc09188d56b9ed59a ] FW hsi contains 256 approximation buckets which are split in ramrod into eight u32 values, but driver is using eight 'unsigned long' variables. This patch fixes the mcast logic by making the API utilize u32. Fixes: 83aeb933 ("qed*: Trivial modifications") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 15 +++++++-------- drivers/net/ethernet/qlogic/qed/qed_l2.h | 2 +- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_vf.c | 4 ++-- drivers/net/ethernet/qlogic/qed/qed_vf.h | 7 ++++++- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index c5452b445c37..83c1c4fa102b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -663,7 +663,7 @@ qed_sp_update_mcast_bin(struct qed_hwfn *p_hwfn, p_ramrod->common.update_approx_mcast_flg = 1; for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) { - u32 *p_bins = (u32 *)p_params->bins; + u32 *p_bins = p_params->bins; p_ramrod->approx_mcast.bins[i] = cpu_to_le32(p_bins[i]); } @@ -1474,8 +1474,8 @@ qed_sp_eth_filter_mcast(struct qed_hwfn *p_hwfn, enum spq_mode comp_mode, struct qed_spq_comp_cb *p_comp_data) { - unsigned long bins[ETH_MULTICAST_MAC_BINS_IN_REGS]; struct vport_update_ramrod_data *p_ramrod = NULL; + u32 bins[ETH_MULTICAST_MAC_BINS_IN_REGS]; struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; u8 abs_vport_id = 0; @@ -1511,26 +1511,25 @@ qed_sp_eth_filter_mcast(struct qed_hwfn *p_hwfn, /* explicitly clear out the entire vector */ memset(&p_ramrod->approx_mcast.bins, 0, sizeof(p_ramrod->approx_mcast.bins)); - memset(bins, 0, sizeof(unsigned long) * - ETH_MULTICAST_MAC_BINS_IN_REGS); + memset(bins, 0, sizeof(bins)); /* filter ADD op is explicit set op and it removes * any existing filters for the vport */ if (p_filter_cmd->opcode == QED_FILTER_ADD) { for (i = 0; i < p_filter_cmd->num_mc_addrs; i++) { - u32 bit; + u32 bit, nbits; bit = qed_mcast_bin_from_mac(p_filter_cmd->mac[i]); - __set_bit(bit, bins); + nbits = sizeof(u32) * BITS_PER_BYTE; + bins[bit / nbits] |= 1 << (bit % nbits); } /* Convert to correct endianity */ for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) { struct vport_update_ramrod_mcast *p_ramrod_bins; - u32 *p_bins = (u32 *)bins; p_ramrod_bins = &p_ramrod->approx_mcast; - p_ramrod_bins->bins[i] = cpu_to_le32(p_bins[i]); + p_ramrod_bins->bins[i] = cpu_to_le32(bins[i]); } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h index cc1f248551c9..91d383f3a661 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h @@ -214,7 +214,7 @@ struct qed_sp_vport_update_params { u8 anti_spoofing_en; u8 update_accept_any_vlan_flg; u8 accept_any_vlan; - unsigned long bins[8]; + u32 bins[8]; struct qed_rss_params *rss_params; struct qed_filter_accept_flags accept_flags; struct qed_sge_tpa_params *sge_tpa_params; diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index d08fe350ab6c..c6411158afd7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -2826,7 +2826,7 @@ qed_iov_vp_update_mcast_bin_param(struct qed_hwfn *p_hwfn, p_data->update_approx_mcast_flg = 1; memcpy(p_data->bins, p_mcast_tlv->bins, - sizeof(unsigned long) * ETH_MULTICAST_MAC_BINS_IN_REGS); + sizeof(u32) * ETH_MULTICAST_MAC_BINS_IN_REGS); *tlvs_mask |= 1 << QED_IOV_VP_UPDATE_MCAST; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 91b5e9f02a62..6eb85db69f9a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -1126,7 +1126,7 @@ int qed_vf_pf_vport_update(struct qed_hwfn *p_hwfn, resp_size += sizeof(struct pfvf_def_resp_tlv); memcpy(p_mcast_tlv->bins, p_params->bins, - sizeof(unsigned long) * ETH_MULTICAST_MAC_BINS_IN_REGS); + sizeof(u32) * ETH_MULTICAST_MAC_BINS_IN_REGS); } update_rx = p_params->accept_flags.update_rx_mode_config; @@ -1272,7 +1272,7 @@ void qed_vf_pf_filter_mcast(struct qed_hwfn *p_hwfn, u32 bit; bit = qed_mcast_bin_from_mac(p_filter_cmd->mac[i]); - __set_bit(bit, sp_params.bins); + sp_params.bins[bit / 32] |= 1 << (bit % 32); } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h index 97d44dfb38ca..1e93c712fa34 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.h +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h @@ -392,7 +392,12 @@ struct vfpf_vport_update_mcast_bin_tlv { struct channel_tlv tl; u8 padding[4]; - u64 bins[8]; + /* There are only 256 approx bins, and in HSI they're divided into + * 32-bit values. As old VFs used to set-bit to the values on its side, + * the upper half of the array is never expected to contain any data. + */ + u64 bins[4]; + u64 obsolete_bins[4]; }; struct vfpf_vport_update_accept_param_tlv { -- GitLab From 5803ce5effc933f0bc142cbb78c8b02931bd801c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 20 Jul 2018 19:30:57 +0200 Subject: [PATCH 1070/1291] atl1c: reserve min skb headroom [ Upstream commit 6e56830776828d8ca9897fc4429eeab47c3bb432 ] Got crash report with following backtrace: BUG: unable to handle kernel paging request at ffff8801869daffe RIP: 0010:[] [] ip6_finish_output2+0x394/0x4c0 RSP: 0018:ffff880186c83a98 EFLAGS: 00010283 RAX: ffff8801869db00e ... [] ip6_finish_output+0x8c/0xf0 [] ip6_output+0x57/0x100 [] ip6_forward+0x4b9/0x840 [] ip6_rcv_finish+0x66/0xc0 [] ipv6_rcv+0x319/0x530 [] netif_receive_skb+0x1c/0x70 [] atl1c_clean+0x1ec/0x310 [atl1c] ... The bad access is in neigh_hh_output(), at skb->data - 16 (HH_DATA_MOD). atl1c driver provided skb with no headroom, so 14 bytes (ethernet header) got pulled, but then 16 are copied. Reserve NET_SKB_PAD bytes headroom, like netdev_alloc_skb(). Compile tested only; I lack hardware. Fixes: 7b7017642199 ("atl1c: Fix misuse of netdev_alloc_skb in refilling rx ring") Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 8c9986f3fc01..3615c2a06fda 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -1685,6 +1685,7 @@ static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter) skb = build_skb(page_address(page) + adapter->rx_page_offset, adapter->rx_frag_size); if (likely(skb)) { + skb_reserve(skb, NET_SKB_PAD); adapter->rx_page_offset += adapter->rx_frag_size; if (adapter->rx_page_offset >= PAGE_SIZE) adapter->rx_page = NULL; -- GitLab From 06ab427345295831dc446df01f971d9235d0dac0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 21 Jul 2018 12:59:25 -0700 Subject: [PATCH 1071/1291] net: prevent ISA drivers from building on PPC32 [ Upstream commit c9ce1fa1c24b08e13c2a3b5b1f94a19c9eaa982c ] Prevent drivers from building on PPC32 if they use isa_bus_to_virt(), isa_virt_to_bus(), or isa_page_to_bus(), which are not available and thus cause build errors. ../drivers/net/ethernet/3com/3c515.c: In function 'corkscrew_open': ../drivers/net/ethernet/3com/3c515.c:824:9: error: implicit declaration of function 'isa_virt_to_bus'; did you mean 'virt_to_bus'? [-Werror=implicit-function-declaration] ../drivers/net/ethernet/amd/lance.c: In function 'lance_rx': ../drivers/net/ethernet/amd/lance.c:1203:23: error: implicit declaration of function 'isa_bus_to_virt'; did you mean 'bus_to_virt'? [-Werror=implicit-function-declaration] ../drivers/net/ethernet/amd/ni65.c: In function 'ni65_init_lance': ../drivers/net/ethernet/amd/ni65.c:585:20: error: implicit declaration of function 'isa_virt_to_bus'; did you mean 'virt_to_bus'? [-Werror=implicit-function-declaration] ../drivers/net/ethernet/cirrus/cs89x0.c: In function 'net_open': ../drivers/net/ethernet/cirrus/cs89x0.c:897:20: error: implicit declaration of function 'isa_virt_to_bus'; did you mean 'virt_to_bus'? [-Werror=implicit-function-declaration] Signed-off-by: Randy Dunlap Suggested-by: Michael Ellerman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/3com/Kconfig | 2 +- drivers/net/ethernet/amd/Kconfig | 4 ++-- drivers/net/ethernet/cirrus/Kconfig | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig index 5b7658bcf020..5c3ef9fc8207 100644 --- a/drivers/net/ethernet/3com/Kconfig +++ b/drivers/net/ethernet/3com/Kconfig @@ -32,7 +32,7 @@ config EL3 config 3C515 tristate "3c515 ISA \"Fast EtherLink\"" - depends on ISA && ISA_DMA_API + depends on ISA && ISA_DMA_API && !PPC32 ---help--- If you have a 3Com ISA EtherLink XL "Corkscrew" 3c515 Fast Ethernet network card, say Y here. diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index d5c15e8bb3de..a8e8f4e9c1bb 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -44,7 +44,7 @@ config AMD8111_ETH config LANCE tristate "AMD LANCE and PCnet (AT1500 and NE2100) support" - depends on ISA && ISA_DMA_API && !ARM + depends on ISA && ISA_DMA_API && !ARM && !PPC32 ---help--- If you have a network (Ethernet) card of this type, say Y here. Some LinkSys cards are of this type. @@ -138,7 +138,7 @@ config PCMCIA_NMCLAN config NI65 tristate "NI6510 support" - depends on ISA && ISA_DMA_API && !ARM + depends on ISA && ISA_DMA_API && !ARM && !PPC32 ---help--- If you have a network (Ethernet) card of this type, say Y here. diff --git a/drivers/net/ethernet/cirrus/Kconfig b/drivers/net/ethernet/cirrus/Kconfig index 5ab912937aff..ec0b545197e2 100644 --- a/drivers/net/ethernet/cirrus/Kconfig +++ b/drivers/net/ethernet/cirrus/Kconfig @@ -19,6 +19,7 @@ if NET_VENDOR_CIRRUS config CS89x0 tristate "CS89x0 support" depends on ISA || EISA || ARM + depends on !PPC32 ---help--- Support for CS89x0 chipset based Ethernet cards. If you have a network (Ethernet) card of this type, say Y and read the file -- GitLab From 0b14a856f918fdfb70b234900fb9ecf92311b368 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 9 Jul 2018 21:16:40 +0200 Subject: [PATCH 1072/1291] can: mpc5xxx_can: check of_iomap return before use [ Upstream commit b5c1a23b17e563b656cc9bb76ce5323b997d90e8 ] of_iomap() can return NULL so that return needs to be checked and NULL treated as failure. While at it also take care of the missing of_node_put() in the error path. Signed-off-by: Nicholas Mc Guire Fixes: commit afa17a500a36 ("net/can: add driver for mscan family & mpc52xx_mscan") Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/mscan/mpc5xxx_can.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c index c7427bdd3a4b..2949a381a94d 100644 --- a/drivers/net/can/mscan/mpc5xxx_can.c +++ b/drivers/net/can/mscan/mpc5xxx_can.c @@ -86,6 +86,11 @@ static u32 mpc52xx_can_get_clock(struct platform_device *ofdev, return 0; } cdm = of_iomap(np_cdm, 0); + if (!cdm) { + of_node_put(np_cdm); + dev_err(&ofdev->dev, "can't map clock node!\n"); + return 0; + } if (in_8(&cdm->ipb_clk_sel) & 0x1) freq *= 2; -- GitLab From 562d7bc6c966ad7f66a85f63843a548ffd1eb8ce Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Tue, 3 Jul 2018 16:47:10 +0530 Subject: [PATCH 1073/1291] can: m_can: Move accessing of message ram to after clocks are enabled [ Upstream commit 54e4a0c486041dc1c20593d997fafd67089e8408 ] MCAN message ram should only be accessed once clocks are enabled. Therefore, move the call to parse/init the message ram to after clocks are enabled. Signed-off-by: Faiz Abbas Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/m_can/m_can.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index ca3fa82316c2..d3ce904e929e 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1637,8 +1637,6 @@ static int m_can_plat_probe(struct platform_device *pdev) priv->can.clock.freq = clk_get_rate(cclk); priv->mram_base = mram_addr; - m_can_of_parse_mram(priv, mram_config_vals); - platform_set_drvdata(pdev, dev); SET_NETDEV_DEV(dev, &pdev->dev); @@ -1649,6 +1647,8 @@ static int m_can_plat_probe(struct platform_device *pdev) goto failed_free_dev; } + m_can_of_parse_mram(priv, mram_config_vals); + devm_can_led_init(dev); dev_info(&pdev->dev, "%s device registered (irq=%d, version=%d)\n", @@ -1698,8 +1698,6 @@ static __maybe_unused int m_can_resume(struct device *dev) pinctrl_pm_select_default_state(dev); - m_can_init_ram(priv); - priv->can.state = CAN_STATE_ERROR_ACTIVE; if (netif_running(ndev)) { @@ -1709,6 +1707,7 @@ static __maybe_unused int m_can_resume(struct device *dev) if (ret) return ret; + m_can_init_ram(priv); m_can_start(ndev); netif_device_attach(ndev); netif_start_queue(ndev); -- GitLab From 385b40b4fc1b54ade551c4fa56ece51364ab323e Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 13 Jul 2018 17:20:17 +0200 Subject: [PATCH 1074/1291] i2c: davinci: Avoid zero value of CLKH [ Upstream commit cc8de9a68599b261244ea453b38678229f06ada7 ] If CLKH is set to 0 I2C clock is not generated at all, so avoid this value and stretch the clock in this case. Signed-off-by: Alexander Sverdlin Acked-by: Sekhar Nori Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-davinci.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index b8c43535f16c..5cf670f57be7 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -234,12 +234,16 @@ static void i2c_davinci_calc_clk_dividers(struct davinci_i2c_dev *dev) /* * It's not always possible to have 1 to 2 ratio when d=7, so fall back * to minimal possible clkh in this case. + * + * Note: + * CLKH is not allowed to be 0, in this case I2C clock is not generated + * at all */ - if (clk >= clkl + d) { + if (clk > clkl + d) { clkh = clk - clkl - d; clkl -= d; } else { - clkh = 0; + clkh = 1; clkl = clk - (d << 1); } -- GitLab From 00f795e12b8bfbc385ba167446098c227c300183 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Jul 2018 10:39:07 +0200 Subject: [PATCH 1075/1291] perf/x86/amd/ibs: Don't access non-started event [ Upstream commit d2753e6b4882a637a0e8fb3b9c2e15f33265300e ] Paul Menzel reported the following bug: > Enabling the undefined behavior sanitizer and building GNU/Linux 4.18-rc5+ > (with some unrelated commits) with GCC 8.1.0 from Debian Sid/unstable, the > warning below is shown. > > > [ 2.111913] > > ================================================================================ > > [ 2.111917] UBSAN: Undefined behaviour in arch/x86/events/amd/ibs.c:582:24 > > [ 2.111919] member access within null pointer of type 'struct perf_event' > > [ 2.111926] CPU: 0 PID: 144 Comm: udevadm Not tainted 4.18.0-rc5-00316-g4864b68cedf2 #104 > > [ 2.111928] Hardware name: ASROCK E350M1/E350M1, BIOS TIMELESS 01/01/1970 > > [ 2.111930] Call Trace: > > [ 2.111943] dump_stack+0x55/0x89 > > [ 2.111949] ubsan_epilogue+0xb/0x33 > > [ 2.111953] handle_null_ptr_deref+0x7f/0x90 > > [ 2.111958] __ubsan_handle_type_mismatch_v1+0x55/0x60 > > [ 2.111964] perf_ibs_handle_irq+0x596/0x620 The code dereferences event before checking the STARTED bit. Patch below should cure the issue. The warning should not trigger, if I analyzed the thing correctly. (And Paul's testing confirms this.) Reported-by: Paul Menzel Tested-by: Paul Menzel Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Paul Menzel Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Vince Weaver Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1807200958390.1580@nanos.tec.linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/amd/ibs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 786fd875de92..8c51844694e2 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -579,7 +579,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) { struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); struct perf_event *event = pcpu->event; - struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event *hwc; struct perf_sample_data data; struct perf_raw_record raw; struct pt_regs regs; @@ -602,6 +602,10 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) return 0; } + if (WARN_ON_ONCE(!event)) + goto fail; + + hwc = &event->hw; msr = hwc->config_base; buf = ibs_data.regs; rdmsrl(msr, *buf); -- GitLab From 1875957f2ec4587b87e0a0c32efe47958a88abe6 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 23 Jul 2018 14:39:33 -0700 Subject: [PATCH 1076/1291] media: staging: omap4iss: Include asm/cacheflush.h after generic includes [ Upstream commit 0894da849f145af51bde88a6b84f95b9c9e0bc66 ] Including asm/cacheflush.h first results in the following build error when trying to build sparc32:allmodconfig, because 'struct page' has not been declared, and the function declaration ends up creating a separate (private) declaration of struct page (as a result of function arguments being in the scope of the function declaration and definition, not in global scope). The C scoping rules do not just affect variable visibility, they also affect type declaration visibility. The end result is that when the actual call site is seen in , the 'struct page' type in the caller is not the same 'struct page' that the function was declared with, resulting in: In file included from arch/sparc/include/asm/page.h:10:0, ... from drivers/staging/media/omap4iss/iss_video.c:15: include/linux/highmem.h: In function 'clear_user_highpage': include/linux/highmem.h:137:31: error: passing argument 1 of 'sparc_flush_page_to_ram' from incompatible pointer type Include generic includes files first to fix the problem. Fixes: fc96d58c10162 ("[media] v4l: omap4iss: Add support for OMAP4 camera interface - Video devices") Suggested-by: Linus Torvalds Acked-by: David S. Miller Cc: Randy Dunlap Signed-off-by: Guenter Roeck [ Added explanation of C scope rules - Linus ] Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/omap4iss/iss_video.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/media/omap4iss/iss_video.c b/drivers/staging/media/omap4iss/iss_video.c index 9e2f0421a01e..0bf6643cca07 100644 --- a/drivers/staging/media/omap4iss/iss_video.c +++ b/drivers/staging/media/omap4iss/iss_video.c @@ -11,7 +11,6 @@ * (at your option) any later version. */ -#include #include #include #include @@ -24,6 +23,8 @@ #include #include +#include + #include "iss_video.h" #include "iss.h" -- GitLab From 637de2c016786aa00697202aefc1d3e69dd05c55 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 24 Jul 2018 02:43:52 -0700 Subject: [PATCH 1077/1291] bnx2x: Fix invalid memory access in rss hash config path. [ Upstream commit ae2dcb28c24794a87e424a726a1cf1a61980f52d ] Rx hash/filter table configuration uses rss_conf_obj to configure filters in the hardware. This object is initialized only when the interface is brought up. This patch adds driver changes to configure rss params only when the device is in opened state. In port disabled case, the config will be cached in the driver structure which will be applied in the successive load path. Please consider applying it to 'net' branch. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 1e33abde4a3e..3fd1085a093f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -3387,14 +3387,18 @@ static int bnx2x_set_rss_flags(struct bnx2x *bp, struct ethtool_rxnfc *info) DP(BNX2X_MSG_ETHTOOL, "rss re-configured, UDP 4-tupple %s\n", udp_rss_requested ? "enabled" : "disabled"); - return bnx2x_rss(bp, &bp->rss_conf_obj, false, true); + if (bp->state == BNX2X_STATE_OPEN) + return bnx2x_rss(bp, &bp->rss_conf_obj, false, + true); } else if ((info->flow_type == UDP_V6_FLOW) && (bp->rss_conf_obj.udp_rss_v6 != udp_rss_requested)) { bp->rss_conf_obj.udp_rss_v6 = udp_rss_requested; DP(BNX2X_MSG_ETHTOOL, "rss re-configured, UDP 4-tupple %s\n", udp_rss_requested ? "enabled" : "disabled"); - return bnx2x_rss(bp, &bp->rss_conf_obj, false, true); + if (bp->state == BNX2X_STATE_OPEN) + return bnx2x_rss(bp, &bp->rss_conf_obj, false, + true); } return 0; @@ -3508,7 +3512,10 @@ static int bnx2x_set_rxfh(struct net_device *dev, const u32 *indir, bp->rss_conf_obj.ind_table[i] = indir[i] + bp->fp->cl_id; } - return bnx2x_config_rss_eth(bp, false); + if (bp->state == BNX2X_STATE_OPEN) + return bnx2x_config_rss_eth(bp, false); + + return 0; } /** -- GitLab From f63868841a310d29f722015ff6bfef79ec61a3cc Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Tue, 24 Jul 2018 01:31:07 +0200 Subject: [PATCH 1078/1291] qmi_wwan: fix interface number for DW5821e production firmware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f25e1392fdb556290957142ac2da33a02cbff403 ] The original mapping for the DW5821e was done using a development version of the firmware. Confirmed with the vendor that the final USB layout ends up exposing the QMI control/data ports in USB config #1, interface #0, not in interface #1 (which is now a HID interface). T: Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 2 P: Vendor=413c ProdID=81d7 Rev=03.18 S: Manufacturer=DELL S: Product=DW5821e Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 1 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=00 Prot=00 Driver=usbhid I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option Fixes: e7e197edd09c25 ("qmi_wwan: add support for the Dell Wireless 5821e module") Signed-off-by: Aleksander Morgado Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 6d3811c869fd..31684f3382f6 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1245,7 +1245,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81b3, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ - {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e */ + {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ -- GitLab From 354e35beb0c582af5cb093e8d1d94e534a48eb4e Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Tue, 24 Jul 2018 10:09:53 +0530 Subject: [PATCH 1079/1291] net: axienet: Fix double deregister of mdio [ Upstream commit 03bc7cab7d7218088412a75e141696a89059ab00 ] If the registration fails then mdio_unregister is called. However at unbind the unregister ia attempted again resulting in the below crash [ 73.544038] kernel BUG at drivers/net/phy/mdio_bus.c:415! [ 73.549362] Internal error: Oops - BUG: 0 [#1] SMP [ 73.554127] Modules linked in: [ 73.557168] CPU: 0 PID: 2249 Comm: sh Not tainted 4.14.0 #183 [ 73.562895] Hardware name: xlnx,zynqmp (DT) [ 73.567062] task: ffffffc879e41180 task.stack: ffffff800cbe0000 [ 73.572973] PC is at mdiobus_unregister+0x84/0x88 [ 73.577656] LR is at axienet_mdio_teardown+0x18/0x30 [ 73.582601] pc : [] lr : [] pstate: 20000145 [ 73.589981] sp : ffffff800cbe3c30 [ 73.593277] x29: ffffff800cbe3c30 x28: ffffffc879e41180 [ 73.598573] x27: ffffff8008a21000 x26: 0000000000000040 [ 73.603868] x25: 0000000000000124 x24: ffffffc879efe920 [ 73.609164] x23: 0000000000000060 x22: ffffffc879e02000 [ 73.614459] x21: ffffffc879e02800 x20: ffffffc87b0b8870 [ 73.619754] x19: ffffffc879e02800 x18: 000000000000025d [ 73.625050] x17: 0000007f9a719ad0 x16: ffffff8008195bd8 [ 73.630345] x15: 0000007f9a6b3d00 x14: 0000000000000010 [ 73.635640] x13: 74656e7265687465 x12: 0000000000000030 [ 73.640935] x11: 0000000000000030 x10: 0101010101010101 [ 73.646231] x9 : 241f394f42533300 x8 : ffffffc8799f6e98 [ 73.651526] x7 : ffffffc8799f6f18 x6 : ffffffc87b0ba318 [ 73.656822] x5 : ffffffc87b0ba498 x4 : 0000000000000000 [ 73.662117] x3 : 0000000000000000 x2 : 0000000000000008 [ 73.667412] x1 : 0000000000000004 x0 : ffffffc8799f4000 [ 73.672708] Process sh (pid: 2249, stack limit = 0xffffff800cbe0000) Fix the same by making the bus NULL on unregister. Signed-off-by: Shubhrajyoti Datta Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index 16c3bfbe1992..757a3b37ae8a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -218,6 +218,7 @@ int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np) ret = of_mdiobus_register(bus, np1); if (ret) { mdiobus_free(bus); + lp->mii_bus = NULL; return ret; } return 0; -- GitLab From b3da5df23900dd092895e8d2b897d4ddc641cc8b Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Fri, 20 Jul 2018 10:39:13 +0200 Subject: [PATCH 1080/1291] locking/rtmutex: Allow specifying a subclass for nested locking [ Upstream commit 62cedf3e60af03e47849fe2bd6a03ec179422a8a ] Needed for annotating rt_mutex locks. Tested-by: John Sperbeck Signed-off-by: Peter Rosin Signed-off-by: Peter Zijlstra (Intel) Cc: Davidlohr Bueso Cc: Deepa Dinamani Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Chang Cc: Peter Zijlstra Cc: Philippe Ombredanne Cc: Thomas Gleixner Cc: Will Deacon Cc: Wolfram Sang Link: http://lkml.kernel.org/r/20180720083914.1950-2-peda@axentia.se Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/rtmutex.h | 7 +++++++ kernel/locking/rtmutex.c | 29 +++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 1b92a28dd672..6fd615a0eea9 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -106,7 +106,14 @@ static inline int rt_mutex_is_locked(struct rt_mutex *lock) extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key); extern void rt_mutex_destroy(struct rt_mutex *lock); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass); +#define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0) +#else extern void rt_mutex_lock(struct rt_mutex *lock); +#define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock) +#endif + extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); extern int rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 65cc0cb984e6..4ad35718f123 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1466,6 +1466,29 @@ rt_mutex_fastunlock(struct rt_mutex *lock, rt_mutex_postunlock(&wake_q); } +static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass) +{ + might_sleep(); + + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/** + * rt_mutex_lock_nested - lock a rt_mutex + * + * @lock: the rt_mutex to be locked + * @subclass: the lockdep subclass + */ +void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) +{ + __rt_mutex_lock(lock, subclass); +} +EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); +#endif + +#ifndef CONFIG_DEBUG_LOCK_ALLOC /** * rt_mutex_lock - lock a rt_mutex * @@ -1473,12 +1496,10 @@ rt_mutex_fastunlock(struct rt_mutex *lock, */ void __sched rt_mutex_lock(struct rt_mutex *lock) { - might_sleep(); - - mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); + __rt_mutex_lock(lock, 0); } EXPORT_SYMBOL_GPL(rt_mutex_lock); +#endif /** * rt_mutex_lock_interruptible - lock a rt_mutex interruptible -- GitLab From 0ba83f87c3f1a16dab4a55ca52c57e59486c9917 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Fri, 20 Jul 2018 10:39:14 +0200 Subject: [PATCH 1081/1291] i2c/mux, locking/core: Annotate the nested rt_mutex usage [ Upstream commit 7b94ea50514d1a0dc94f02723b603c27bc0ea597 ] If an i2c topology has instances of nested muxes, then a lockdep splat is produced when when i2c_parent_lock_bus() is called. Here is an example: ============================================ WARNING: possible recursive locking detected -------------------------------------------- insmod/68159 is trying to acquire lock: (i2c_register_adapter#2){+.+.}, at: i2c_parent_lock_bus+0x32/0x50 [i2c_mux] but task is already holding lock: (i2c_register_adapter#2){+.+.}, at: i2c_parent_lock_bus+0x32/0x50 [i2c_mux] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(i2c_register_adapter#2); lock(i2c_register_adapter#2); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by insmod/68159: #0: (i2c_register_adapter#2){+.+.}, at: i2c_parent_lock_bus+0x32/0x50 [i2c_mux] stack backtrace: CPU: 13 PID: 68159 Comm: insmod Tainted: G O Call Trace: dump_stack+0x67/0x98 __lock_acquire+0x162e/0x1780 lock_acquire+0xba/0x200 rt_mutex_lock+0x44/0x60 i2c_parent_lock_bus+0x32/0x50 [i2c_mux] i2c_parent_lock_bus+0x3e/0x50 [i2c_mux] i2c_smbus_xfer+0xf0/0x700 i2c_smbus_read_byte+0x42/0x70 my2c_init+0xa2/0x1000 [my2c] do_one_initcall+0x51/0x192 do_init_module+0x62/0x216 load_module+0x20f9/0x2b50 SYSC_init_module+0x19a/0x1c0 SyS_init_module+0xe/0x10 do_syscall_64+0x6c/0x1a0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 Reported-by: John Sperbeck Tested-by: John Sperbeck Signed-off-by: Peter Rosin Signed-off-by: Peter Zijlstra (Intel) Cc: Davidlohr Bueso Cc: Deepa Dinamani Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Chang Cc: Peter Zijlstra Cc: Philippe Ombredanne Cc: Thomas Gleixner Cc: Will Deacon Cc: Wolfram Sang Link: http://lkml.kernel.org/r/20180720083914.1950-3-peda@axentia.se Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-core-base.c | 2 +- drivers/i2c/i2c-mux.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 6f2fe63e8f5a..7b961c9c62ef 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -638,7 +638,7 @@ static int i2c_check_addr_busy(struct i2c_adapter *adapter, int addr) static void i2c_adapter_lock_bus(struct i2c_adapter *adapter, unsigned int flags) { - rt_mutex_lock(&adapter->bus_lock); + rt_mutex_lock_nested(&adapter->bus_lock, i2c_adapter_depth(adapter)); } /** diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 9669ca4937b8..7ba31f6bf148 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -144,7 +144,7 @@ static void i2c_mux_lock_bus(struct i2c_adapter *adapter, unsigned int flags) struct i2c_mux_priv *priv = adapter->algo_data; struct i2c_adapter *parent = priv->muxc->parent; - rt_mutex_lock(&parent->mux_lock); + rt_mutex_lock_nested(&parent->mux_lock, i2c_adapter_depth(adapter)); if (!(flags & I2C_LOCK_ROOT_ADAPTER)) return; i2c_lock_bus(parent, flags); @@ -181,7 +181,7 @@ static void i2c_parent_lock_bus(struct i2c_adapter *adapter, struct i2c_mux_priv *priv = adapter->algo_data; struct i2c_adapter *parent = priv->muxc->parent; - rt_mutex_lock(&parent->mux_lock); + rt_mutex_lock_nested(&parent->mux_lock, i2c_adapter_depth(adapter)); i2c_lock_bus(parent, flags); } -- GitLab From d35aab9df15a514b3efc69f92011625cd934df95 Mon Sep 17 00:00:00 2001 From: Hailong Liu Date: Wed, 18 Jul 2018 08:46:55 +0800 Subject: [PATCH 1082/1291] sched/rt: Restore rt_runtime after disabling RT_RUNTIME_SHARE [ Upstream commit f3d133ee0a17d5694c6f21873eec9863e11fa423 ] NO_RT_RUNTIME_SHARE feature is used to prevent a CPU borrow enough runtime with a spin-rt-task. However, if RT_RUNTIME_SHARE feature is enabled and rt_rq has borrowd enough rt_runtime at the beginning, rt_runtime can't be restored to its initial bandwidth rt_runtime after we disable RT_RUNTIME_SHARE. E.g. on my PC with 4 cores, procedure to reproduce: 1) Make sure RT_RUNTIME_SHARE is enabled cat /sys/kernel/debug/sched_features GENTLE_FAIR_SLEEPERS START_DEBIT NO_NEXT_BUDDY LAST_BUDDY CACHE_HOT_BUDDY WAKEUP_PREEMPTION NO_HRTICK NO_DOUBLE_TICK LB_BIAS NONTASK_CAPACITY TTWU_QUEUE NO_SIS_AVG_CPU SIS_PROP NO_WARN_DOUBLE_CLOCK RT_PUSH_IPI RT_RUNTIME_SHARE NO_LB_MIN ATTACH_AGE_LOAD WA_IDLE WA_WEIGHT WA_BIAS 2) Start a spin-rt-task ./loop_rr & 3) set affinity to the last cpu taskset -p 8 $pid_of_loop_rr 4) Observe that last cpu have borrowed enough runtime. cat /proc/sched_debug | grep rt_runtime .rt_runtime : 950.000000 .rt_runtime : 900.000000 .rt_runtime : 950.000000 .rt_runtime : 1000.000000 5) Disable RT_RUNTIME_SHARE echo NO_RT_RUNTIME_SHARE > /sys/kernel/debug/sched_features 6) Observe that rt_runtime can not been restored cat /proc/sched_debug | grep rt_runtime .rt_runtime : 950.000000 .rt_runtime : 900.000000 .rt_runtime : 950.000000 .rt_runtime : 1000.000000 This patch help to restore rt_runtime after we disable RT_RUNTIME_SHARE. Signed-off-by: Hailong Liu Signed-off-by: Jiang Biao Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: zhong.weidong@zte.com.cn Link: http://lkml.kernel.org/r/1531874815-39357-1-git-send-email-liu.hailong6@zte.com.cn Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/sched/rt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index bba2217652ff..cb9a5b8532fa 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -837,6 +837,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) * can be time-consuming. Try to avoid it when possible. */ raw_spin_lock(&rt_rq->rt_runtime_lock); + if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF) + rt_rq->rt_runtime = rt_b->rt_runtime; skip = !rt_rq->rt_time && !rt_rq->rt_nr_running; raw_spin_unlock(&rt_rq->rt_runtime_lock); if (skip) -- GitLab From 165335d4f18e0f730009452f52086c362c250dd2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Jul 2018 16:08:27 -0700 Subject: [PATCH 1083/1291] x86/boot: Fix if_changed build flip/flop bug [ Upstream commit 92a4728608a8fd228c572bc8ff50dd98aa0ddf2a ] Dirk Gouders reported that two consecutive "make" invocations on an already compiled tree will show alternating behaviors: $ make CALL scripts/checksyscalls.sh DESCEND objtool CHK include/generated/compile.h DATAREL arch/x86/boot/compressed/vmlinux Kernel: arch/x86/boot/bzImage is ready (#48) Building modules, stage 2. MODPOST 165 modules $ make CALL scripts/checksyscalls.sh DESCEND objtool CHK include/generated/compile.h LD arch/x86/boot/compressed/vmlinux ZOFFSET arch/x86/boot/zoffset.h AS arch/x86/boot/header.o LD arch/x86/boot/setup.elf OBJCOPY arch/x86/boot/setup.bin OBJCOPY arch/x86/boot/vmlinux.bin BUILD arch/x86/boot/bzImage Setup is 15644 bytes (padded to 15872 bytes). System is 6663 kB CRC 3eb90f40 Kernel: arch/x86/boot/bzImage is ready (#48) Building modules, stage 2. MODPOST 165 modules He bisected it back to: commit 98f78525371b ("x86/boot: Refuse to build with data relocations") The root cause was the use of the "if_changed" kbuild function multiple times for the same target. It was designed to only be used once per target, otherwise it will effectively always trigger, flipping back and forth between the two commands getting recorded by "if_changed". Instead, this patch merges the two commands into a single function to get stable build artifacts (i.e. .vmlinux.cmd), and a single build behavior. Bisected-and-Reported-by: Dirk Gouders Fix-Suggested-by: Masahiro Yamada Signed-off-by: Kees Cook Reviewed-by: Masahiro Yamada Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180724230827.GA37823@beast Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 98018a621f6b..3a250ca2406c 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -104,9 +104,13 @@ define cmd_check_data_rel done endef +# We need to run two commands under "if_changed", so merge them into a +# single invocation. +quiet_cmd_check-and-link-vmlinux = LD $@ + cmd_check-and-link-vmlinux = $(cmd_check_data_rel); $(cmd_ld) + $(obj)/vmlinux: $(vmlinux-objs-y) FORCE - $(call if_changed,check_data_rel) - $(call if_changed,ld) + $(call if_changed,check-and-link-vmlinux) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S $(obj)/vmlinux.bin: vmlinux FORCE -- GitLab From 819b476c21386dc2a771b19dab6451cabec628e7 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Modukuri Date: Wed, 25 Jul 2018 14:31:20 +0100 Subject: [PATCH 1084/1291] fscache: Allow cancelled operations to be enqueued [ Upstream commit d0eb06afe712b7b103b6361f40a9a0c638524669 ] Alter the state-check assertion in fscache_enqueue_operation() to allow cancelled operations to be given processing time so they can be cleaned up. Also fix a debugging statement that was requiring such operations to have an object assigned. Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem") Reported-by: Kiran Kumar Modukuri Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/fscache/operation.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index de67745e1cd7..77946d6f617d 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -66,7 +66,8 @@ void fscache_enqueue_operation(struct fscache_operation *op) ASSERT(op->processor != NULL); ASSERT(fscache_object_is_available(op->object)); ASSERTCMP(atomic_read(&op->usage), >, 0); - ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); + ASSERTIFCMP(op->state != FSCACHE_OP_ST_IN_PROGRESS, + op->state, ==, FSCACHE_OP_ST_CANCELLED); fscache_stat(&fscache_n_op_enqueue); switch (op->flags & FSCACHE_OP_TYPE) { @@ -481,7 +482,8 @@ void fscache_put_operation(struct fscache_operation *op) struct fscache_cache *cache; _enter("{OBJ%x OP%x,%d}", - op->object->debug_id, op->debug_id, atomic_read(&op->usage)); + op->object ? op->object->debug_id : 0, + op->debug_id, atomic_read(&op->usage)); ASSERTCMP(atomic_read(&op->usage), >, 0); -- GitLab From 4029dd9fc48b7bb3f9a490474321fb7e20890863 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Modukuri Date: Tue, 18 Jul 2017 16:25:49 -0700 Subject: [PATCH 1085/1291] cachefiles: Fix refcounting bug in backing-file read monitoring [ Upstream commit 934140ab028713a61de8bca58c05332416d037d1 ] cachefiles_read_waiter() has the right to access a 'monitor' object by virtue of being called under the waitqueue lock for one of the pages in its purview. However, it has no ref on that monitor object or on the associated operation. What it is allowed to do is to move the monitor object to the operation's to_do list, but once it drops the work_lock, it's actually no longer permitted to access that object. However, it is trying to enqueue the retrieval operation for processing - but it can only do this via a pointer in the monitor object, something it shouldn't be doing. If it doesn't enqueue the operation, the operation may not get processed. If the order is flipped so that the enqueue is first, then it's possible for the work processor to look at the to_do list before the monitor is enqueued upon it. Fix this by getting a ref on the operation so that we can trust that it will still be there once we've added the monitor to the to_do list and dropped the work_lock. The op can then be enqueued after the lock is dropped. The bug can manifest in one of a couple of ways. The first manifestation looks like: FS-Cache: FS-Cache: Assertion failed FS-Cache: 6 == 5 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/operation.c:494! RIP: 0010:fscache_put_operation+0x1e3/0x1f0 ... fscache_op_work_func+0x26/0x50 process_one_work+0x131/0x290 worker_thread+0x45/0x360 kthread+0xf8/0x130 ? create_worker+0x190/0x190 ? kthread_cancel_work_sync+0x10/0x10 ret_from_fork+0x1f/0x30 This is due to the operation being in the DEAD state (6) rather than INITIALISED, COMPLETE or CANCELLED (5) because it's already passed through fscache_put_operation(). The bug can also manifest like the following: kernel BUG at fs/fscache/operation.c:69! ... [exception RIP: fscache_enqueue_operation+246] ... #7 [ffff883fff083c10] fscache_enqueue_operation at ffffffffa0b793c6 #8 [ffff883fff083c28] cachefiles_read_waiter at ffffffffa0b15a48 #9 [ffff883fff083c48] __wake_up_common at ffffffff810af028 I'm not entirely certain as to which is line 69 in Lei's kernel, so I'm not entirely clear which assertion failed. Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem") Reported-by: Lei Xue Reported-by: Vegard Nossum Reported-by: Anthony DeRobertis Reported-by: NeilBrown Reported-by: Daniel Axtens Reported-by: Kiran Kumar Modukuri Signed-off-by: David Howells Reviewed-by: Daniel Axtens Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/rdwr.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 18d7aa61ef0f..199eb396a1bb 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -27,6 +27,7 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, struct cachefiles_one_read *monitor = container_of(wait, struct cachefiles_one_read, monitor); struct cachefiles_object *object; + struct fscache_retrieval *op = monitor->op; struct wait_bit_key *key = _key; struct page *page = wait->private; @@ -51,16 +52,22 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, list_del(&wait->entry); /* move onto the action list and queue for FS-Cache thread pool */ - ASSERT(monitor->op); + ASSERT(op); - object = container_of(monitor->op->op.object, - struct cachefiles_object, fscache); + /* We need to temporarily bump the usage count as we don't own a ref + * here otherwise cachefiles_read_copier() may free the op between the + * monitor being enqueued on the op->to_do list and the op getting + * enqueued on the work queue. + */ + fscache_get_retrieval(op); + object = container_of(op->op.object, struct cachefiles_object, fscache); spin_lock(&object->work_lock); - list_add_tail(&monitor->op_link, &monitor->op->to_do); + list_add_tail(&monitor->op_link, &op->to_do); spin_unlock(&object->work_lock); - fscache_enqueue_retrieval(monitor->op); + fscache_enqueue_retrieval(op); + fscache_put_retrieval(op); return 0; } -- GitLab From 2c69b0300458f6b7b614c64607f8a2df8e652d22 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Modukuri Date: Thu, 21 Jun 2018 13:25:53 -0700 Subject: [PATCH 1086/1291] cachefiles: Wait rather than BUG'ing on "Unexpected object collision" [ Upstream commit c2412ac45a8f8f1cd582723c1a139608694d410d ] If we meet a conflicting object that is marked FSCACHE_OBJECT_IS_LIVE in the active object tree, we have been emitting a BUG after logging information about it and the new object. Instead, we should wait for the CACHEFILES_OBJECT_ACTIVE flag to be cleared on the old object (or return an error). The ACTIVE flag should be cleared after it has been removed from the active object tree. A timeout of 60s is used in the wait, so we shouldn't be able to get stuck there. Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem") Signed-off-by: Kiran Kumar Modukuri Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/namei.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 3978b324cbca..5f2f67d220fa 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -195,7 +195,6 @@ static int cachefiles_mark_object_active(struct cachefiles_cache *cache, pr_err("\n"); pr_err("Error: Unexpected object collision\n"); cachefiles_printk_object(object, xobject); - BUG(); } atomic_inc(&xobject->usage); write_unlock(&cache->active_lock); -- GitLab From d00c34f8e20516f553a6920921bff239cd479c03 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 14 Jul 2018 01:28:44 +0900 Subject: [PATCH 1087/1291] selftests/ftrace: Add snapshot and tracing_on test case [ Upstream commit 82f4f3e69c5c29bce940dd87a2c0f16c51d48d17 ] Add a testcase for checking snapshot and tracing_on relationship. This ensures that the snapshotting doesn't affect current tracing on/off settings. Link: http://lkml.kernel.org/r/153149932412.11274.15289227592627901488.stgit@devbox Cc: Tom Zanussi Cc: Hiraku Toyooka Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../ftrace/test.d/00basic/snapshot.tc | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc new file mode 100644 index 000000000000..3b1f45e13a2e --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc @@ -0,0 +1,28 @@ +#!/bin/sh +# description: Snapshot and tracing setting +# flags: instance + +[ ! -f snapshot ] && exit_unsupported + +echo "Set tracing off" +echo 0 > tracing_on + +echo "Allocate and take a snapshot" +echo 1 > snapshot + +# Since trace buffer is empty, snapshot is also empty, but allocated +grep -q "Snapshot is allocated" snapshot + +echo "Ensure keep tracing off" +test `cat tracing_on` -eq 0 + +echo "Set tracing on" +echo 1 > tracing_on + +echo "Take a snapshot again" +echo 1 > snapshot + +echo "Ensure keep tracing on" +test `cat tracing_on` -eq 1 + +exit 0 -- GitLab From 2dd2f772257054867543c8051a174b3e1d879c2f Mon Sep 17 00:00:00 2001 From: dann frazier Date: Mon, 23 Jul 2018 16:55:40 -0600 Subject: [PATCH 1088/1291] hinic: Link the logical network device to the pci device in sysfs [ Upstream commit 7856e8616273098dc6c09a6e084afd98a283ff0d ] Otherwise interfaces get exposed under /sys/devices/virtual, which doesn't give udev the context it needs for PCI-based predictable interface names. Signed-off-by: dann frazier Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/huawei/hinic/hinic_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index eb53bd93065e..a696b5b2d40e 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -981,6 +981,7 @@ static int nic_dev_init(struct pci_dev *pdev) hinic_hwdev_cb_register(nic_dev->hwdev, HINIC_MGMT_MSG_CMD_LINK_STATUS, nic_dev, link_status_event_handler); + SET_NETDEV_DEV(netdev, &pdev->dev); err = register_netdev(netdev); if (err) { dev_err(&pdev->dev, "Failed to register netdev\n"); -- GitLab From 92c159863d8afefa9ca25a180feba6bff11c2be5 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 26 Jul 2018 16:37:19 -0700 Subject: [PATCH 1089/1291] ipc/sem.c: prevent queue.status tearing in semop [ Upstream commit f075faa300acc4f6301e348acde0a4580ed5f77c ] In order for load/store tearing prevention to work, _all_ accesses to the variable in question need to be done around READ and WRITE_ONCE() macros. Ensure everyone does so for q->status variable for semtimedop(). Link: http://lkml.kernel.org/r/20180717052654.676-1-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- ipc/sem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipc/sem.c b/ipc/sem.c index b2698ebdcb31..d6dd2dc9ddad 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2041,7 +2041,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops, } do { - queue.status = -EINTR; + WRITE_ONCE(queue.status, -EINTR); queue.sleeper = current; __set_current_state(TASK_INTERRUPTIBLE); -- GitLab From a73b6c4c2601dbe1cef92d7adeaf267fc50dcd4e Mon Sep 17 00:00:00 2001 From: Li Wang Date: Thu, 26 Jul 2018 16:37:42 -0700 Subject: [PATCH 1090/1291] zswap: re-check zswap_is_full() after do zswap_shrink() [ Upstream commit 16e536ef47f567289a5699abee9ff7bb304bc12d ] /sys/../zswap/stored_pages keeps rising in a zswap test with "zswap.max_pool_percent=0" parameter. But it should not compress or store pages any more since there is no space in the compressed pool. Reproduce steps: 1. Boot kernel with "zswap.enabled=1" 2. Set the max_pool_percent to 0 # echo 0 > /sys/module/zswap/parameters/max_pool_percent 3. Do memory stress test to see if some pages have been compressed # stress --vm 1 --vm-bytes $mem_available"M" --timeout 60s 4. Watching the 'stored_pages' number increasing or not The root cause is: When zswap_max_pool_percent is set to 0 via kernel parameter, zswap_is_full() will always return true due to zswap_shrink(). But if the shinking is able to reclain a page successfully the code then proceeds to compressing/storing another page, so the value of stored_pages will keep changing. To solve the issue, this patch adds a zswap_is_full() check again after zswap_shrink() to make sure it's now under the max_pool_percent, and to not compress/store if we reached the limit. Link: http://lkml.kernel.org/r/20180530103936.17812-1-liwang@redhat.com Signed-off-by: Li Wang Acked-by: Dan Streetman Cc: Seth Jennings Cc: Huang Ying Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/zswap.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/zswap.c b/mm/zswap.c index 597008a44f70..ebb0bc88c5f7 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -989,6 +989,15 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, ret = -ENOMEM; goto reject; } + + /* A second zswap_is_full() check after + * zswap_shrink() to make sure it's now + * under the max_pool_percent + */ + if (zswap_is_full()) { + ret = -ENOMEM; + goto reject; + } } /* allocate entry */ -- GitLab From 9339ea7c92fc47cc6389375ca8e1e064d316fb64 Mon Sep 17 00:00:00 2001 From: Calvin Walton Date: Fri, 27 Jul 2018 07:50:53 -0400 Subject: [PATCH 1091/1291] tools/power turbostat: Read extended processor family from CPUID [ Upstream commit 5aa3d1a20a233d4a5f1ec3d62da3f19d9afea682 ] This fixes the reported family on modern AMD processors (e.g. Ryzen, which is family 0x17). Previously these processors all showed up as family 0xf. See the document https://support.amd.com/TechDocs/56255_OSRR.pdf section CPUID_Fn00000001_EAX for how to calculate the family from the BaseFamily and ExtFamily values. This matches the code in arch/x86/lib/cpu.c Signed-off-by: Calvin Walton Signed-off-by: Len Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/power/x86/turbostat/turbostat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index b28de3ad3907..1512086c8cb8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4029,7 +4029,9 @@ void process_cpuid() family = (fms >> 8) & 0xf; model = (fms >> 4) & 0xf; stepping = fms & 0xf; - if (family == 6 || family == 0xf) + if (family == 0xf) + family += (fms >> 20) & 0xff; + if (family >= 6) model += ((fms >> 16) & 0xf) << 4; if (!quiet) { -- GitLab From 49c1fba34589c96624c738f11c8278a590c9ef99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 27 Jul 2018 13:13:39 +0200 Subject: [PATCH 1092/1291] Revert "MIPS: BCM47XX: Enable 74K Core ExternalSync for PCIe erratum" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d5ea019f8a381f88545bb26993b62ec24a2796b7 ] This reverts commit 2a027b47dba6 ("MIPS: BCM47XX: Enable 74K Core ExternalSync for PCIe erratum"). Enabling ExternalSync caused a regression for BCM4718A1 (used e.g. in Netgear E3000 and ASUS RT-N16): it simply hangs during PCIe initialization. It's likely that BCM4717A1 is also affected. I didn't notice that earlier as the only BCM47XX devices with PCIe I own are: 1) BCM4706 with 2 x 14e4:4331 2) BCM4706 with 14e4:4360 and 14e4:4331 it appears that BCM4706 is unaffected. While BCM5300X-ES300-RDS.pdf seems to document that erratum and its workarounds (according to quotes provided by Tokunori) it seems not even Broadcom follows them. According to the provided info Broadcom should define CONF7_ES in their SDK's mipsinc.h and implement workaround in the si_mips_init(). Checking both didn't reveal such code. It *could* mean Broadcom also had some problems with the given workaround. Signed-off-by: Rafał Miłecki Signed-off-by: Paul Burton Reported-by: Michael Marley Patchwork: https://patchwork.linux-mips.org/patch/20032/ URL: https://bugs.openwrt.org/index.php?do=details&task_id=1688 Cc: Tokunori Ikegami Cc: Hauke Mehrtens Cc: Chris Packham Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/bcm47xx/setup.c | 6 ------ arch/mips/include/asm/mipsregs.h | 3 --- 2 files changed, 9 deletions(-) diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index 8c9cbf13d32a..6054d49e608e 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -212,12 +212,6 @@ static int __init bcm47xx_cpu_fixes(void) */ if (bcm47xx_bus.bcma.bus.chipinfo.id == BCMA_CHIP_ID_BCM4706) cpu_wait = NULL; - - /* - * BCM47XX Erratum "R10: PCIe Transactions Periodically Fail" - * Enable ExternalSync for sync instruction to take effect - */ - set_c0_config7(MIPS_CONF7_ES); break; #endif } diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 60c787d943b0..a6810923b3f0 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -680,8 +680,6 @@ #define MIPS_CONF7_WII (_ULCAST_(1) << 31) #define MIPS_CONF7_RPS (_ULCAST_(1) << 2) -/* ExternalSync */ -#define MIPS_CONF7_ES (_ULCAST_(1) << 8) #define MIPS_CONF7_IAR (_ULCAST_(1) << 10) #define MIPS_CONF7_AR (_ULCAST_(1) << 16) @@ -2747,7 +2745,6 @@ __BUILD_SET_C0(status) __BUILD_SET_C0(cause) __BUILD_SET_C0(config) __BUILD_SET_C0(config5) -__BUILD_SET_C0(config7) __BUILD_SET_C0(intcontrol) __BUILD_SET_C0(intctl) __BUILD_SET_C0(srsmap) -- GitLab From 331c36cd01d8c11dbde3abc8e31b7ec8c67d1c02 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 26 Jul 2018 16:15:43 +0300 Subject: [PATCH 1093/1291] ARC: dma [non-IOC] setup SMP_CACHE_BYTES and cache_line_size [ Upstream commit eb2777397fd83a4a7eaa26984d09d3babb845d2a ] As for today we don't setup SMP_CACHE_BYTES and cache_line_size for ARC, so they are set to L1_CACHE_BYTES by default. L1 line length (L1_CACHE_BYTES) might be easily smaller than L2 line (which is usually the case BTW). This breaks code. For example this breaks ethernet infrastructure on HSDK/AXS103 boards with IOC disabled, involving manual cache flushes Functions which alloc and manage sk_buff packet data area rely on SMP_CACHE_BYTES define. In the result we can share last L2 cache line in sk_buff linear packet data area between DMA buffer and some useful data in other structure. So we can lose this data when we invalidate DMA buffer. sk_buff linear packet data area | | | skb->end skb->tail V | | V V ----------------------------------------------. packet data | | ----------------------------------------------. ---------------------.--------------------------------------------------. SLC line | SLC (L2 cache) line (128B) | ---------------------.--------------------------------------------------. ^ ^ | | These cache lines will be invalidated when we invalidate skb linear packet data area before DMA transaction starting. This leads to issues painful to debug as it reproduces only if (sk_buff->end - sk_buff->tail) < SLC_LINE_SIZE and if we have some useful data right after sk_buff->end. Fix that by hardcode SMP_CACHE_BYTES to max line length we may have. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/Kconfig | 3 +++ arch/arc/include/asm/cache.h | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 5c8caf85c350..8ff066090680 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -45,6 +45,9 @@ config ARC select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZMA +config ARCH_HAS_CACHE_LINE_SIZE + def_bool y + config MIGHT_HAVE_PCI bool diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index 8486f328cc5d..ff7d3232764a 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -48,7 +48,9 @@ }) /* Largest line length for either L1 or L2 is 128 bytes */ -#define ARCH_DMA_MINALIGN 128 +#define SMP_CACHE_BYTES 128 +#define cache_line_size() SMP_CACHE_BYTES +#define ARCH_DMA_MINALIGN SMP_CACHE_BYTES extern void arc_cache_init(void); extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); -- GitLab From bc928fdf5d1ebfd19feec4ee90baf4b965d26de8 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 29 Jul 2018 00:28:31 +0900 Subject: [PATCH 1094/1291] bpf: use GFP_ATOMIC instead of GFP_KERNEL in bpf_parse_prog() [ Upstream commit 71eb5255f55bdb484d35ff7c9a1803f453dfbf82 ] bpf_parse_prog() is protected by rcu_read_lock(). so that GFP_KERNEL is not allowed in the bpf_parse_prog(). [51015.579396] ============================= [51015.579418] WARNING: suspicious RCU usage [51015.579444] 4.18.0-rc6+ #208 Not tainted [51015.579464] ----------------------------- [51015.579488] ./include/linux/rcupdate.h:303 Illegal context switch in RCU read-side critical section! [51015.579510] other info that might help us debug this: [51015.579532] rcu_scheduler_active = 2, debug_locks = 1 [51015.579556] 2 locks held by ip/1861: [51015.579577] #0: 00000000a8c12fd1 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x2e0/0x910 [51015.579711] #1: 00000000bf815f8e (rcu_read_lock){....}, at: lwtunnel_build_state+0x96/0x390 [51015.579842] stack backtrace: [51015.579869] CPU: 0 PID: 1861 Comm: ip Not tainted 4.18.0-rc6+ #208 [51015.579891] Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 07/08/2015 [51015.579911] Call Trace: [51015.579950] dump_stack+0x74/0xbb [51015.580000] ___might_sleep+0x16b/0x3a0 [51015.580047] __kmalloc_track_caller+0x220/0x380 [51015.580077] kmemdup+0x1c/0x40 [51015.580077] bpf_parse_prog+0x10e/0x230 [51015.580164] ? kasan_kmalloc+0xa0/0xd0 [51015.580164] ? bpf_destroy_state+0x30/0x30 [51015.580164] ? bpf_build_state+0xe2/0x3e0 [51015.580164] bpf_build_state+0x1bb/0x3e0 [51015.580164] ? bpf_parse_prog+0x230/0x230 [51015.580164] ? lock_is_held_type+0x123/0x1a0 [51015.580164] lwtunnel_build_state+0x1aa/0x390 [51015.580164] fib_create_info+0x1579/0x33d0 [51015.580164] ? sched_clock_local+0xe2/0x150 [51015.580164] ? fib_info_update_nh_saddr+0x1f0/0x1f0 [51015.580164] ? sched_clock_local+0xe2/0x150 [51015.580164] fib_table_insert+0x201/0x1990 [51015.580164] ? lock_downgrade+0x610/0x610 [51015.580164] ? fib_table_lookup+0x1920/0x1920 [51015.580164] ? lwtunnel_valid_encap_type.part.6+0xcb/0x3a0 [51015.580164] ? rtm_to_fib_config+0x637/0xbd0 [51015.580164] inet_rtm_newroute+0xed/0x1b0 [51015.580164] ? rtm_to_fib_config+0xbd0/0xbd0 [51015.580164] rtnetlink_rcv_msg+0x331/0x910 [ ... ] Fixes: 3a0af8fd61f9 ("bpf: BPF for lightweight tunnel infrastructure") Signed-off-by: Taehee Yoo Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/core/lwt_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 1307731ddfe4..832d69649cb6 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -217,7 +217,7 @@ static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog, if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME]) return -EINVAL; - prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_KERNEL); + prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_ATOMIC); if (!prog->name) return -ENOMEM; -- GitLab From d4f96c0515fc7f448d9f9a559f76f4364d2d3fd3 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Fri, 27 Jul 2018 20:56:52 -0700 Subject: [PATCH 1095/1291] nfp: flower: fix port metadata conversion bug [ Upstream commit ee614c871014045b45fae149b7245fc22a0bbdd8 ] Function nfp_flower_repr_get_type_and_port expects an enum nfp_repr_type return value but, if the repr type is unknown, returns a value of type enum nfp_flower_cmsg_port_type. This means that if FW encodes the port ID in a way the driver does not understand instead of dropping the frame driver may attribute it to a physical port (uplink) provided the port number is less than physical port count. Fix this and ensure a net_device of NULL is returned if the repr can not be determined. Fixes: 1025351a88a4 ("nfp: add flower app") Signed-off-by: John Hurley Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/netronome/nfp/flower/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index 91fe03617106..72496060e332 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -79,7 +79,7 @@ nfp_flower_repr_get_type_and_port(struct nfp_app *app, u32 port_id, u8 *port) return NFP_REPR_TYPE_VF; } - return NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC; + return __NFP_REPR_TYPE_MAX; } static struct net_device * @@ -90,6 +90,8 @@ nfp_flower_repr_get(struct nfp_app *app, u32 port_id) u8 port = 0; repr_type = nfp_flower_repr_get_type_and_port(app, port_id, &port); + if (repr_type > NFP_REPR_TYPE_MAX) + return NULL; reprs = rcu_dereference(app->reprs[repr_type]); if (!reprs) -- GitLab From 2312e6a802b3a5636c487fe7222757f7a573995d Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Fri, 27 Jul 2018 11:19:29 -0700 Subject: [PATCH 1096/1291] enic: handle mtu change for vf properly [ Upstream commit ab123fe071c9aa9680ecd62eb080eb26cff4892c ] When driver gets notification for mtu change, driver does not handle it for all RQs. It handles only RQ[0]. Fix is to use enic_change_mtu() interface to change mtu for vf. Signed-off-by: Govindarajulu Varadarajan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cisco/enic/enic_main.c | 78 +++++++-------------- 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 800edfbd36c1..a3e632e63552 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2007,28 +2007,42 @@ static int enic_stop(struct net_device *netdev) return 0; } +static int _enic_change_mtu(struct net_device *netdev, int new_mtu) +{ + bool running = netif_running(netdev); + int err = 0; + + ASSERT_RTNL(); + if (running) { + err = enic_stop(netdev); + if (err) + return err; + } + + netdev->mtu = new_mtu; + + if (running) { + err = enic_open(netdev); + if (err) + return err; + } + + return 0; +} + static int enic_change_mtu(struct net_device *netdev, int new_mtu) { struct enic *enic = netdev_priv(netdev); - int running = netif_running(netdev); if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) return -EOPNOTSUPP; - if (running) - enic_stop(netdev); - - netdev->mtu = new_mtu; - if (netdev->mtu > enic->port_mtu) netdev_warn(netdev, - "interface MTU (%d) set higher than port MTU (%d)\n", - netdev->mtu, enic->port_mtu); + "interface MTU (%d) set higher than port MTU (%d)\n", + netdev->mtu, enic->port_mtu); - if (running) - enic_open(netdev); - - return 0; + return _enic_change_mtu(netdev, new_mtu); } static void enic_change_mtu_work(struct work_struct *work) @@ -2036,47 +2050,9 @@ static void enic_change_mtu_work(struct work_struct *work) struct enic *enic = container_of(work, struct enic, change_mtu_work); struct net_device *netdev = enic->netdev; int new_mtu = vnic_dev_mtu(enic->vdev); - int err; - unsigned int i; - - new_mtu = max_t(int, ENIC_MIN_MTU, min_t(int, ENIC_MAX_MTU, new_mtu)); rtnl_lock(); - - /* Stop RQ */ - del_timer_sync(&enic->notify_timer); - - for (i = 0; i < enic->rq_count; i++) - napi_disable(&enic->napi[i]); - - vnic_intr_mask(&enic->intr[0]); - enic_synchronize_irqs(enic); - err = vnic_rq_disable(&enic->rq[0]); - if (err) { - rtnl_unlock(); - netdev_err(netdev, "Unable to disable RQ.\n"); - return; - } - vnic_rq_clean(&enic->rq[0], enic_free_rq_buf); - vnic_cq_clean(&enic->cq[0]); - vnic_intr_clean(&enic->intr[0]); - - /* Fill RQ with new_mtu-sized buffers */ - netdev->mtu = new_mtu; - vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf); - /* Need at least one buffer on ring to get going */ - if (vnic_rq_desc_used(&enic->rq[0]) == 0) { - rtnl_unlock(); - netdev_err(netdev, "Unable to alloc receive buffers.\n"); - return; - } - - /* Start RQ */ - vnic_rq_enable(&enic->rq[0]); - napi_enable(&enic->napi[0]); - vnic_intr_unmask(&enic->intr[0]); - enic_notify_timer_start(enic); - + (void)_enic_change_mtu(netdev, new_mtu); rtnl_unlock(); netdev_info(netdev, "interface MTU set as %d\n", netdev->mtu); -- GitLab From d267258ee192f86d176af910eb7bb4acc3796542 Mon Sep 17 00:00:00 2001 From: Ofer Levi Date: Sat, 28 Jul 2018 10:54:41 +0300 Subject: [PATCH 1097/1291] ARC: [plat-eznps] Add missing struct nps_host_reg_aux_dpc [ Upstream commit 05b466bf846d2e8d2f0baf8dfd81a42cc933e237 ] Fixing compilation issue caused by missing struct nps_host_reg_aux_dpc definition. Fixes: 3f9cd874dcc87 ("ARC: [plat-eznps] avoid toggling of DPC register") Reported-by: Randy Dunlap Signed-off-by: Ofer Levi Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/plat-eznps/include/plat/ctop.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h index 0c7d11022d0f..bd34b96bc591 100644 --- a/arch/arc/plat-eznps/include/plat/ctop.h +++ b/arch/arc/plat-eznps/include/plat/ctop.h @@ -143,6 +143,15 @@ struct nps_host_reg_gim_p_int_dst { }; /* AUX registers definition */ +struct nps_host_reg_aux_dpc { + union { + struct { + u32 ien:1, men:1, hen:1, reserved:29; + }; + u32 value; + }; +}; + struct nps_host_reg_aux_udmc { union { struct { -- GitLab From 79f9c523ca43ecbce364f0f3b08f48c800c0f367 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 29 Jul 2018 11:10:51 -0700 Subject: [PATCH 1098/1291] arc: [plat-eznps] fix data type errors in platform headers [ Upstream commit b1f32ce1c3d2c11959b7e6a2c58dc5197c581966 ] Add to fix build errors. Both ctop.h and use u32 types and cause many errors. Examples: ../include/soc/nps/common.h:71:4: error: unknown type name 'u32' u32 __reserved:20, cluster:4, core:4, thread:4; ../include/soc/nps/common.h:76:3: error: unknown type name 'u32' u32 value; ../include/soc/nps/common.h:124:4: error: unknown type name 'u32' u32 base:8, cl_x:4, cl_y:4, ../include/soc/nps/common.h:127:3: error: unknown type name 'u32' u32 value; ../arch/arc/plat-eznps/include/plat/ctop.h:83:4: error: unknown type name 'u32' u32 gen:1, gdis:1, clk_gate_dis:1, asb:1, ../arch/arc/plat-eznps/include/plat/ctop.h:86:3: error: unknown type name 'u32' u32 value; ../arch/arc/plat-eznps/include/plat/ctop.h:93:4: error: unknown type name 'u32' u32 csa:22, dmsid:6, __reserved:3, cs:1; ../arch/arc/plat-eznps/include/plat/ctop.h:95:3: error: unknown type name 'u32' u32 value; Cc: linux-snps-arc@lists.infradead.org Cc: Ofer Levi Reviewed-by: Leon Romanovsky Signed-off-by: Randy Dunlap Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/plat-eznps/include/plat/ctop.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h index bd34b96bc591..4f6a1673b3a6 100644 --- a/arch/arc/plat-eznps/include/plat/ctop.h +++ b/arch/arc/plat-eznps/include/plat/ctop.h @@ -21,6 +21,7 @@ #error "Incorrect ctop.h include" #endif +#include #include /* core auxiliary registers */ -- GitLab From aca05b1741d31b7528be3821c93bdc3296125b70 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jul 2018 20:16:35 -0700 Subject: [PATCH 1099/1291] arc: [plat-eznps] fix printk warning in arc/plat-eznps/mtm.c [ Upstream commit 9e2ea405543d9ddfe05b351f1679e53bd9c11f80 ] Fix printk format warning in arch/arc/plat-eznps/mtm.c: In file included from ../include/linux/printk.h:7, from ../include/linux/kernel.h:14, from ../include/linux/list.h:9, from ../include/linux/smp.h:12, from ../arch/arc/plat-eznps/mtm.c:17: ../arch/arc/plat-eznps/mtm.c: In function 'set_mtm_hs_ctr': ../include/linux/kern_levels.h:5:18: warning: format '%d' expects argument of type 'int', but argument 2 has type 'long int' [-Wformat=] #define KERN_SOH "\001" /* ASCII Start Of Header */ ^~~~~~ ../include/linux/kern_levels.h:11:18: note: in expansion of macro 'KERN_SOH' #define KERN_ERR KERN_SOH "3" /* error conditions */ ^~~~~~~~ ../include/linux/printk.h:308:9: note: in expansion of macro 'KERN_ERR' printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) ^~~~~~~~ ../arch/arc/plat-eznps/mtm.c:166:3: note: in expansion of macro 'pr_err' pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n", ^~~~~~ ../arch/arc/plat-eznps/mtm.c:166:40: note: format string is defined here pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n", ~^ %ld The hs_ctr variable can just be int instead of long, so also change kstrtol() to kstrtoint() and leave the format string as %d. Also add 2 header files since they are used in mtm.c and we prefer not to depend on accidental/indirect #includes. Cc: linux-snps-arc@lists.infradead.org Cc: Ofer Levi Reviewed-by: Leon Romanovsky Signed-off-by: Randy Dunlap Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/plat-eznps/mtm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arc/plat-eznps/mtm.c b/arch/arc/plat-eznps/mtm.c index 2388de3d09ef..ed0077ef666e 100644 --- a/arch/arc/plat-eznps/mtm.c +++ b/arch/arc/plat-eznps/mtm.c @@ -15,6 +15,8 @@ */ #include +#include +#include #include #include #include @@ -157,10 +159,10 @@ void mtm_enable_core(unsigned int cpu) /* Verify and set the value of the mtm hs counter */ static int __init set_mtm_hs_ctr(char *ctr_str) { - long hs_ctr; + int hs_ctr; int ret; - ret = kstrtol(ctr_str, 0, &hs_ctr); + ret = kstrtoint(ctr_str, 0, &hs_ctr); if (ret || hs_ctr > MT_HS_CNT_MAX || hs_ctr < MT_HS_CNT_MIN) { pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n", -- GitLab From 391e3007e44741900eaf2893b7b6d62eaba3ee4b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jul 2018 20:16:35 -0700 Subject: [PATCH 1100/1291] arc: fix build errors in arc/include/asm/delay.h [ Upstream commit 2423665ec53f2a29191b35382075e9834288a975 ] Fix build errors in arch/arc/'s delay.h: - add "extern unsigned long loops_per_jiffy;" - add for "u64" In file included from ../drivers/infiniband/hw/cxgb3/cxio_hal.c:32: ../arch/arc/include/asm/delay.h: In function '__udelay': ../arch/arc/include/asm/delay.h:61:12: error: 'u64' undeclared (first use in this function) loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32; ^~~ In file included from ../drivers/infiniband/hw/cxgb3/cxio_hal.c:32: ../arch/arc/include/asm/delay.h: In function '__udelay': ../arch/arc/include/asm/delay.h:63:37: error: 'loops_per_jiffy' undeclared (first use in this function) loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32; ^~~~~~~~~~~~~~~ Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Cc: Elad Kanfi Cc: Leon Romanovsky Cc: Ofer Levi Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/delay.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h index d5da2115d78a..03d6bb0f4e13 100644 --- a/arch/arc/include/asm/delay.h +++ b/arch/arc/include/asm/delay.h @@ -17,8 +17,11 @@ #ifndef __ASM_ARC_UDELAY_H #define __ASM_ARC_UDELAY_H +#include #include /* HZ */ +extern unsigned long loops_per_jiffy; + static inline void __delay(unsigned long loops) { __asm__ __volatile__( -- GitLab From 24fab572ae7d09bef6939de602d546be554d2d43 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jul 2018 20:16:35 -0700 Subject: [PATCH 1101/1291] arc: fix type warnings in arc/mm/cache.c [ Upstream commit ec837d620c750c0d4996a907c8c4f7febe1bbeee ] Fix type warnings in arch/arc/mm/cache.c. ../arch/arc/mm/cache.c: In function 'flush_anon_page': ../arch/arc/mm/cache.c:1062:55: warning: passing argument 2 of '__flush_dcache_page' makes integer from pointer without a cast [-Wint-conversion] __flush_dcache_page((phys_addr_t)page_address(page), page_address(page)); ^~~~~~~~~~~~~~~~~~ ../arch/arc/mm/cache.c:1013:59: note: expected 'long unsigned int' but argument is of type 'void *' void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr) ~~~~~~~~~~~~~~^~~~~ Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Cc: Elad Kanfi Cc: Leon Romanovsky Cc: Ofer Levi Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/mm/cache.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index eee924dfffa6..d14499500106 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1035,7 +1035,7 @@ void flush_cache_mm(struct mm_struct *mm) void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, unsigned long pfn) { - unsigned int paddr = pfn << PAGE_SHIFT; + phys_addr_t paddr = pfn << PAGE_SHIFT; u_vaddr &= PAGE_MASK; @@ -1055,8 +1055,9 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long u_vaddr) { /* TBD: do we really need to clear the kernel mapping */ - __flush_dcache_page(page_address(page), u_vaddr); - __flush_dcache_page(page_address(page), page_address(page)); + __flush_dcache_page((phys_addr_t)page_address(page), u_vaddr); + __flush_dcache_page((phys_addr_t)page_address(page), + (phys_addr_t)page_address(page)); } -- GitLab From 7c841ea7f8f153f77c63989ceda2355ad0f3e43b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 6 Jun 2018 10:11:10 -0400 Subject: [PATCH 1102/1291] sparc/time: Add missing __init to init_tick_ops() [ Upstream commit 6f57ed681ed817a4ec444e83f3aa2ad695d5ef34 ] Code that was added to force gcc not to inline any function that isn't explicitly declared as inline uncovered that init_tick_ops() isn't marked as "__init". It is only called by __init functions and more importantly it too calls an __init function which would require it to be __init as well. Link: http://lkml.kernel.org/r/201806060444.hdHcKOBy%fengguang.wu@intel.com Reported-by: kbuild test robot Signed-off-by: Steven Rostedt (VMware) Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/time_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 3b397081047a..83aaf4888999 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -813,7 +813,7 @@ static void __init get_tick_patch(void) } } -static void init_tick_ops(struct sparc64_tick_ops *ops) +static void __init init_tick_ops(struct sparc64_tick_ops *ops) { unsigned long freq, quotient, tick; -- GitLab From 574a4f3e6173f65c8167e60f4c18041d1053dde3 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 24 Jul 2018 13:53:05 +0200 Subject: [PATCH 1103/1291] sparc: use asm-generic version of msi.h [ Upstream commit 12be1036c536f849ad6f9bba73cffa708aa965c3 ] This is necessary to be able to include when CONFIG_GENERIC_MSI_IRQ_DOMAIN is enabled. Without this, a build with CONFIG_GENERIC_MSI_IRQ_DOMAIN fails with: In file included from drivers//ata/ahci.c:45:0: >> include/linux/msi.h:226:10: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? msi_alloc_info_t *arg); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:230:9: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? msi_alloc_info_t *arg); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:239:12: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? msi_alloc_info_t *arg); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:240:22: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? void (*msi_finish)(msi_alloc_info_t *arg, int retval); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:241:20: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? void (*set_desc)(msi_alloc_info_t *arg, ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:316:18: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? int nvec, msi_alloc_info_t *args); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:318:29: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? int virq, int nvec, msi_alloc_info_t *args); ^~~~~~~~~~~~~~~~ sg_alloc_fn Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/sparc/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 80ddc01f57ac..fcbc0c0aa087 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -14,6 +14,7 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += module.h +generic-y += msi.h generic-y += preempt.h generic-y += rwsem.h generic-y += serial.h -- GitLab From dfa5c4bf8c94ea4ba95f933f36eed4ccb49b0a18 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Mon, 30 Jul 2018 09:56:54 -0700 Subject: [PATCH 1104/1291] enic: do not call enic_change_mtu in enic_probe [ Upstream commit cb5c6568867325f9905e80c96531d963bec8e5ea ] In commit ab123fe071c9 ("enic: handle mtu change for vf properly") ASSERT_RTNL() is added to _enic_change_mtu() to prevent it from being called without rtnl held. enic_probe() calls enic_change_mtu() without rtnl held. At this point netdev is not registered yet. Remove call to enic_change_mtu and assign the mtu to netdev->mtu. Fixes: ab123fe071c9 ("enic: handle mtu change for vf properly") Signed-off-by: Govindarajulu Varadarajan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cisco/enic/enic_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index a3e632e63552..2bfaf3e118b1 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2843,7 +2843,6 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) */ enic->port_mtu = enic->config.mtu; - (void)enic_change_mtu(netdev, enic->port_mtu); err = enic_set_mac_addr(netdev, enic->mac_addr); if (err) { @@ -2930,6 +2929,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* MTU range: 68 - 9000 */ netdev->min_mtu = ENIC_MIN_MTU; netdev->max_mtu = ENIC_MAX_MTU; + netdev->mtu = enic->port_mtu; err = register_netdev(netdev); if (err) { -- GitLab From 8babbc02f893cf8b38c0f5b8a5ba32a274fc5a0b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 1 Aug 2018 10:38:43 -0700 Subject: [PATCH 1105/1291] squashfs metadata 2: electric boogaloo [ Upstream commit cdbb65c4c7ead680ebe54f4f0d486e2847a500ea ] Anatoly continues to find issues with fuzzed squashfs images. This time, corrupt, missing, or undersized data for the page filling wasn't checked for, because the squashfs_{copy,read}_cache() functions did the squashfs_copy_data() call without checking the resulting data size. Which could result in the page cache pages being incompletely filled in, and no error indication to the user space reading garbage data. So make a helper function for the "fill in pages" case, because the exact same incomplete sequence existed in two places. [ I should have made a squashfs branch for these things, but I didn't intend to start doing them in the first place. My historical connection through cramfs is why I got into looking at these issues at all, and every time I (continue to) think it's a one-off. Because _this_ time is always the last time. Right? - Linus ] Reported-by: Anatoly Trosinenko Tested-by: Willy Tarreau Cc: Al Viro Cc: Phillip Lougher Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/squashfs/file.c | 25 ++++++++++++++++++------- fs/squashfs/file_direct.c | 8 +------- fs/squashfs/squashfs.h | 1 + 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index fcff2e0487fe..cce3060650ae 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -374,13 +374,29 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) return squashfs_block_size(size); } +void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, int offset, int avail) +{ + int copied; + void *pageaddr; + + pageaddr = kmap_atomic(page); + copied = squashfs_copy_data(pageaddr, buffer, offset, avail); + memset(pageaddr + copied, 0, PAGE_SIZE - copied); + kunmap_atomic(pageaddr); + + flush_dcache_page(page); + if (copied == avail) + SetPageUptodate(page); + else + SetPageError(page); +} + /* Copy data into page cache */ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, int bytes, int offset) { struct inode *inode = page->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - void *pageaddr; int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; int start_index = page->index & ~mask, end_index = start_index | mask; @@ -406,12 +422,7 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, if (PageUptodate(push_page)) goto skip_page; - pageaddr = kmap_atomic(push_page); - squashfs_copy_data(pageaddr, buffer, offset, avail); - memset(pageaddr + avail, 0, PAGE_SIZE - avail); - kunmap_atomic(pageaddr); - flush_dcache_page(push_page); - SetPageUptodate(push_page); + squashfs_fill_page(push_page, buffer, offset, avail); skip_page: unlock_page(push_page); if (i != page->index) diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index cb485d8e0e91..096990254a2e 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -144,7 +144,6 @@ static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, block, bsize); int bytes = buffer->length, res = buffer->error, n, offset = 0; - void *pageaddr; if (res) { ERROR("Unable to read page, block %llx, size %x\n", block, @@ -159,12 +158,7 @@ static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, if (page[n] == NULL) continue; - pageaddr = kmap_atomic(page[n]); - squashfs_copy_data(pageaddr, buffer, offset, avail); - memset(pageaddr + avail, 0, PAGE_SIZE - avail); - kunmap_atomic(pageaddr); - flush_dcache_page(page[n]); - SetPageUptodate(page[n]); + squashfs_fill_page(page[n], buffer, offset, avail); unlock_page(page[n]); if (page[n] != target_page) put_page(page[n]); diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 887d6d270080..d8d43724cf2a 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -67,6 +67,7 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *, u64, u64, unsigned int); /* file.c */ +void squashfs_fill_page(struct page *, struct squashfs_cache_entry *, int, int); void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, int); -- GitLab From 249778d9459a4ed9f8dada4ca8ccc2ff09407482 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 1 Aug 2018 11:31:52 -0700 Subject: [PATCH 1106/1291] mm: delete historical BUG from zap_pmd_range() [ Upstream commit 53406ed1bcfdabe4b5bc35e6d17946c6f9f563e2 ] Delete the old VM_BUG_ON_VMA() from zap_pmd_range(), which asserted that mmap_sem must be held when splitting an "anonymous" vma there. Whether that's still strictly true nowadays is not entirely clear, but the danger of sometimes crashing on the BUG is now fairly clear. Even with the new stricter rules for anonymous vma marking, the condition it checks for can possible trigger. Commit 44960f2a7b63 ("staging: ashmem: Fix SIGBUS crash when traversing mmaped ashmem pages") is good, and originally I thought it was safe from that VM_BUG_ON_VMA(), because the /dev/ashmem fd exposed to the user is disconnected from the vm_file in the vma, and madvise(,,MADV_REMOVE) insists on VM_SHARED. But after I read John's earlier mail, drawing attention to the vfs_fallocate() in there: I may be wrong, and I don't know if Android has THP in the config anyway, but it looks to me like an unmap_mapping_range() from ashmem's vfs_fallocate() could hit precisely the VM_BUG_ON_VMA(), once it's vma_is_anonymous(). Signed-off-by: Hugh Dickins Cc: John Stultz Cc: Kirill Shutemov Cc: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 5539b1975091..1d61c5fe12ef 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1417,11 +1417,9 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, do { next = pmd_addr_end(addr, end); if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { - if (next - addr != HPAGE_PMD_SIZE) { - VM_BUG_ON_VMA(vma_is_anonymous(vma) && - !rwsem_is_locked(&tlb->mm->mmap_sem), vma); + if (next - addr != HPAGE_PMD_SIZE) __split_huge_pmd(vma, pmd, addr, false, NULL); - } else if (zap_huge_pmd(tlb, vma, pmd, addr)) + else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ } -- GitLab From 28013eecf6a0f04c121912f57aed03db55493dfe Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Thu, 2 Aug 2018 16:45:15 +0100 Subject: [PATCH 1107/1291] Squashfs: Compute expected length from inode size rather than block length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a3f94cb99a854fa381fe7fadd97c4f61633717a5 ] Previously in squashfs_readpage() when copying data into the page cache, it used the length of the datablock read from the filesystem (after decompression). However, if the filesystem has been corrupted this data block may be short, which will leave pages unfilled. The fix for this is to compute the expected number of bytes to copy from the inode size, and use this to detect if the block is short. Signed-off-by: Phillip Lougher Tested-by: Willy Tarreau Cc: Анатолий Тросиненко Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/squashfs/file.c | 25 ++++++++++--------------- fs/squashfs/file_cache.c | 4 ++-- fs/squashfs/file_direct.c | 16 +++++++++++----- fs/squashfs/squashfs.h | 2 +- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index cce3060650ae..f1c1430ae721 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -431,10 +431,9 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, } /* Read datablock stored packed inside a fragment (tail-end packed block) */ -static int squashfs_readpage_fragment(struct page *page) +static int squashfs_readpage_fragment(struct page *page, int expected) { struct inode *inode = page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); @@ -445,23 +444,16 @@ static int squashfs_readpage_fragment(struct page *page) squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); else - squashfs_copy_cache(page, buffer, i_size_read(inode) & - (msblk->block_size - 1), + squashfs_copy_cache(page, buffer, expected, squashfs_i(inode)->fragment_offset); squashfs_cache_put(buffer); return res; } -static int squashfs_readpage_sparse(struct page *page, int index, int file_end) +static int squashfs_readpage_sparse(struct page *page, int expected) { - struct inode *inode = page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int bytes = index == file_end ? - (i_size_read(inode) & (msblk->block_size - 1)) : - msblk->block_size; - - squashfs_copy_cache(page, NULL, bytes, 0); + squashfs_copy_cache(page, NULL, expected, 0); return 0; } @@ -471,6 +463,9 @@ static int squashfs_readpage(struct file *file, struct page *page) struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; int index = page->index >> (msblk->block_log - PAGE_SHIFT); int file_end = i_size_read(inode) >> msblk->block_log; + int expected = index == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; int res; void *pageaddr; @@ -489,11 +484,11 @@ static int squashfs_readpage(struct file *file, struct page *page) goto error_out; if (bsize == 0) - res = squashfs_readpage_sparse(page, index, file_end); + res = squashfs_readpage_sparse(page, expected); else - res = squashfs_readpage_block(page, block, bsize); + res = squashfs_readpage_block(page, block, bsize, expected); } else - res = squashfs_readpage_fragment(page); + res = squashfs_readpage_fragment(page, expected); if (!res) return 0; diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c index f2310d2a2019..a9ba8d96776a 100644 --- a/fs/squashfs/file_cache.c +++ b/fs/squashfs/file_cache.c @@ -20,7 +20,7 @@ #include "squashfs.h" /* Read separately compressed datablock and memcopy into page cache */ -int squashfs_readpage_block(struct page *page, u64 block, int bsize) +int squashfs_readpage_block(struct page *page, u64 block, int bsize, int expected) { struct inode *i = page->mapping->host; struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, @@ -31,7 +31,7 @@ int squashfs_readpage_block(struct page *page, u64 block, int bsize) ERROR("Unable to read page, block %llx, size %x\n", block, bsize); else - squashfs_copy_cache(page, buffer, buffer->length, 0); + squashfs_copy_cache(page, buffer, expected, 0); squashfs_cache_put(buffer); return res; diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index 096990254a2e..80db1b86a27c 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -21,10 +21,11 @@ #include "page_actor.h" static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page); + int pages, struct page **page, int bytes); /* Read separately compressed datablock directly into page cache */ -int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) +int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, + int expected) { struct inode *inode = target_page->mapping->host; @@ -83,7 +84,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) * using an intermediate buffer. */ res = squashfs_read_cache(target_page, block, bsize, pages, - page); + page, expected); if (res < 0) goto mark_errored; @@ -95,6 +96,11 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) if (res < 0) goto mark_errored; + if (res != expected) { + res = -EIO; + goto mark_errored; + } + /* Last page may have trailing bytes not filled */ bytes = res % PAGE_SIZE; if (bytes) { @@ -138,12 +144,12 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page) + int pages, struct page **page, int bytes) { struct inode *i = target_page->mapping->host; struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, block, bsize); - int bytes = buffer->length, res = buffer->error, n, offset = 0; + int res = buffer->error, n, offset = 0; if (res) { ERROR("Unable to read page, block %llx, size %x\n", block, diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index d8d43724cf2a..f89f8a74c6ce 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -72,7 +72,7 @@ void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, int); /* file_xxx.c */ -extern int squashfs_readpage_block(struct page *, u64, int); +extern int squashfs_readpage_block(struct page *, u64, int, int); /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); -- GitLab From 47041cf42a47492ac013f72aba2145711f7f3fc5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Aug 2018 18:22:41 +0100 Subject: [PATCH 1108/1291] drivers: net: lmc: fix case value for target abort error [ Upstream commit afb41bb039656f0cecb54eeb8b2e2088201295f5 ] Current value for a target abort error is 0x010, however, this value should in fact be 0x002. As it stands, the range of error is 0..7 so it is currently never being detected. This bug has been in the driver since the early 2.6.12 days (or before). Detected by CoverityScan, CID#744290 ("Logically dead code") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wan/lmc/lmc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 4698450c77d1..bb43d176eb4e 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -1371,7 +1371,7 @@ static irqreturn_t lmc_interrupt (int irq, void *dev_instance) /*fold00*/ case 0x001: printk(KERN_WARNING "%s: Master Abort (naughty)\n", dev->name); break; - case 0x010: + case 0x002: printk(KERN_WARNING "%s: Target Abort (not so naughty)\n", dev->name); break; default: -- GitLab From 1d7bf02d716d353924b963e3f24508416701a382 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 2 Aug 2018 15:36:01 -0700 Subject: [PATCH 1109/1291] memcg: remove memcg_cgroup::id from IDR on mem_cgroup_css_alloc() failure [ Upstream commit 7e97de0b033bcac4fa9a35cef72e0c06e6a22c67 ] In case of memcg_online_kmem() failure, memcg_cgroup::id remains hashed in mem_cgroup_idr even after memcg memory is freed. This leads to leak of ID in mem_cgroup_idr. This patch adds removal into mem_cgroup_css_alloc(), which fixes the problem. For better readability, it adds a generic helper which is used in mem_cgroup_alloc() and mem_cgroup_id_put_many() as well. Link: http://lkml.kernel.org/r/152354470916.22460.14397070748001974638.stgit@localhost.localdomain Fixes 73f576c04b94 ("mm: memcontrol: fix cgroup creation failure after many small jobs") Signed-off-by: Kirill Tkhai Acked-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index db69d938e9ed..6a9a7e1066ef 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4110,6 +4110,14 @@ static struct cftype mem_cgroup_legacy_files[] = { static DEFINE_IDR(mem_cgroup_idr); +static void mem_cgroup_id_remove(struct mem_cgroup *memcg) +{ + if (memcg->id.id > 0) { + idr_remove(&mem_cgroup_idr, memcg->id.id); + memcg->id.id = 0; + } +} + static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n) { VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0); @@ -4120,8 +4128,7 @@ static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) { VM_BUG_ON(atomic_read(&memcg->id.ref) < n); if (atomic_sub_and_test(n, &memcg->id.ref)) { - idr_remove(&mem_cgroup_idr, memcg->id.id); - memcg->id.id = 0; + mem_cgroup_id_remove(memcg); /* Memcg ID pins CSS */ css_put(&memcg->css); @@ -4258,8 +4265,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); return memcg; fail: - if (memcg->id.id > 0) - idr_remove(&mem_cgroup_idr, memcg->id.id); + mem_cgroup_id_remove(memcg); __mem_cgroup_free(memcg); return NULL; } @@ -4318,6 +4324,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) return &memcg->css; fail: + mem_cgroup_id_remove(memcg); mem_cgroup_free(memcg); return ERR_PTR(-ENOMEM); } -- GitLab From fbb37b72489dba442b3bd55a4508a12893ae9c9d Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 12 Jul 2018 17:25:06 +0200 Subject: [PATCH 1110/1291] gpiolib-acpi: make sure we trigger edge events at least once on boot [ Upstream commit ca876c7483b697b498868b1f575997191b077885 ] On some systems using edge triggered ACPI Event Interrupts, the initial state at boot is not setup by the firmware, instead relying on the edge irq event handler running at least once to setup the initial state. 2 known examples of this are: 1) The Surface 3 has its _LID state controlled by an ACPI operation region triggered by a GPIO event: OperationRegion (GPOR, GeneralPurposeIo, Zero, One) Field (GPOR, ByteAcc, NoLock, Preserve) { Connection ( GpioIo (Shared, PullNone, 0x0000, 0x0000, IoRestrictionNone, "\\_SB.GPO0", 0x00, ResourceConsumer, , ) { // Pin list 0x004C } ), HELD, 1 } Method (_E4C, 0, Serialized) // _Exx: Edge-Triggered GPE { If ((HELD == One)) { ^^LID.LIDB = One } Else { ^^LID.LIDB = Zero Notify (LID, 0x80) // Status Change } Notify (^^PCI0.SPI1.NTRG, One) // Device Check } Currently, the state of LIDB is wrong until the user actually closes or open the cover. We need to trigger the GPIO event once to update the internal ACPI state. Coincidentally, this also enables the Surface 2 integrated HID sensor hub which also requires an ACPI gpio operation region to start initialization. 2) Various Bay Trail based tablets come with an external USB mux and TI T1210B USB phy to enable USB gadget mode. The mux is controlled by a GPIO which is controlled by an edge triggered ACPI Event Interrupt which monitors the micro-USB ID pin. When the tablet is connected to a PC (or no cable is plugged in), the ID pin is high and the tablet should be in gadget mode. But the GPIO controlling the mux is initialized by the firmware so that the USB data lines are muxed to the host controller. This means that if the user wants to use gadget mode, the user needs to first plug in a host-cable to force the ID pin low and then unplug it and connect the tablet to a PC, to get the ACPI event handler to run and switch the mux to device mode, This commit fixes both by running the event-handler once on boot. Note that the running of the event-handler is done from a late_initcall, this is done because the handler AML code may rely on OperationRegions registered by other builtin drivers. This avoids errors like these: [ 0.133026] ACPI Error: No handler for Region [XSCG] ((____ptrval____)) [GenericSerialBus] (20180531/evregion-132) [ 0.133036] ACPI Error: Region GenericSerialBus (ID=9) has no handler (20180531/exfldio-265) [ 0.133046] ACPI Error: Method parse/execution failed \_SB.GPO2._E12, AE_NOT_EXIST (20180531/psparse-516) Signed-off-by: Benjamin Tissoires [hdegoede: Document BYT USB mux reliance on initial trigger] [hdegoede: Run event handler from a late_initcall, rather then immediately] Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib-acpi.c | 56 ++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index d6f3d9ee1350..70b3c556f6cf 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -25,6 +25,7 @@ struct acpi_gpio_event { struct list_head node; + struct list_head initial_sync_list; acpi_handle handle; unsigned int pin; unsigned int irq; @@ -50,6 +51,9 @@ struct acpi_gpio_chip { struct list_head events; }; +static LIST_HEAD(acpi_gpio_initial_sync_list); +static DEFINE_MUTEX(acpi_gpio_initial_sync_list_lock); + static int acpi_gpiochip_find(struct gpio_chip *gc, void *data) { if (!gc->parent) @@ -142,6 +146,21 @@ static struct gpio_desc *acpi_get_gpiod(char *path, int pin) return gpiochip_get_desc(chip, offset); } +static void acpi_gpio_add_to_initial_sync_list(struct acpi_gpio_event *event) +{ + mutex_lock(&acpi_gpio_initial_sync_list_lock); + list_add(&event->initial_sync_list, &acpi_gpio_initial_sync_list); + mutex_unlock(&acpi_gpio_initial_sync_list_lock); +} + +static void acpi_gpio_del_from_initial_sync_list(struct acpi_gpio_event *event) +{ + mutex_lock(&acpi_gpio_initial_sync_list_lock); + if (!list_empty(&event->initial_sync_list)) + list_del_init(&event->initial_sync_list); + mutex_unlock(&acpi_gpio_initial_sync_list_lock); +} + static irqreturn_t acpi_gpio_irq_handler(int irq, void *data) { struct acpi_gpio_event *event = data; @@ -193,7 +212,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, irq_handler_t handler = NULL; struct gpio_desc *desc; unsigned long irqflags; - int ret, pin, irq; + int ret, pin, irq, value; if (!acpi_gpio_get_irq_resource(ares, &agpio)) return AE_OK; @@ -228,6 +247,8 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, gpiod_direction_input(desc); + value = gpiod_get_value(desc); + ret = gpiochip_lock_as_irq(chip, pin); if (ret) { dev_err(chip->parent, "Failed to lock GPIO as interrupt\n"); @@ -269,6 +290,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, event->irq = irq; event->pin = pin; event->desc = desc; + INIT_LIST_HEAD(&event->initial_sync_list); ret = request_threaded_irq(event->irq, NULL, handler, irqflags, "ACPI:Event", event); @@ -283,6 +305,18 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, enable_irq_wake(irq); list_add_tail(&event->node, &acpi_gpio->events); + + /* + * Make sure we trigger the initial state of the IRQ when using RISING + * or FALLING. Note we run the handlers on late_init, the AML code + * may refer to OperationRegions from other (builtin) drivers which + * may be probed after us. + */ + if (handler == acpi_gpio_irq_handler && + (((irqflags & IRQF_TRIGGER_RISING) && value == 1) || + ((irqflags & IRQF_TRIGGER_FALLING) && value == 0))) + acpi_gpio_add_to_initial_sync_list(event); + return AE_OK; fail_free_event: @@ -355,6 +389,8 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip) list_for_each_entry_safe_reverse(event, ep, &acpi_gpio->events, node) { struct gpio_desc *desc; + acpi_gpio_del_from_initial_sync_list(event); + if (irqd_is_wakeup_set(irq_get_irq_data(event->irq))) disable_irq_wake(event->irq); @@ -1210,3 +1246,21 @@ bool acpi_can_fallback_to_crs(struct acpi_device *adev, const char *con_id) return con_id == NULL; } + +/* Sync the initial state of handlers after all builtin drivers have probed */ +static int acpi_gpio_initial_sync(void) +{ + struct acpi_gpio_event *event, *ep; + + mutex_lock(&acpi_gpio_initial_sync_list_lock); + list_for_each_entry_safe(event, ep, &acpi_gpio_initial_sync_list, + initial_sync_list) { + acpi_evaluate_object(event->handle, NULL, NULL, NULL); + list_del_init(&event->initial_sync_list); + } + mutex_unlock(&acpi_gpio_initial_sync_list_lock); + + return 0; +} +/* We must use _sync so that this runs after the first deferred_probe run */ +late_initcall_sync(acpi_gpio_initial_sync); -- GitLab From 95239b2db50f301b19da9f15df724e129e9b8ff5 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 31 Jul 2018 15:46:01 +0200 Subject: [PATCH 1111/1291] scsi: fcoe: fix use-after-free in fcoe_ctlr_els_send [ Upstream commit 2d7d4fd35e6e15b47c13c70368da83add19f01e7 ] KASAN reports a use-after-free in fcoe_ctlr_els_send() when we're sending a LOGO and have FIP debugging enabled. This is because we're first freeing the skb and then printing the frame's DID. But the DID is a member of the FC frame header which in turn is the skb's payload. Exchange the debug print and kfree_skb() calls so we're not touching the freed data. Signed-off-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/fcoe/fcoe_ctlr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index fff6f1851dc1..d38bda333f7a 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -799,9 +799,9 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, fip->send(fip, skb); return -EINPROGRESS; drop: - kfree_skb(skb); LIBFCOE_FIP_DBG(fip, "drop els_send op %u d_id %x\n", op, ntoh24(fh->fh_d_id)); + kfree_skb(skb); return -EINVAL; } EXPORT_SYMBOL(fcoe_ctlr_els_send); -- GitLab From a67aef68ef24aaa11317662768733c2390e78d07 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 31 Jul 2018 15:46:02 +0200 Subject: [PATCH 1112/1291] scsi: fcoe: drop frames in ELS LOGO error path [ Upstream commit 63d0e3dffda311e77b9a8c500d59084e960a824a ] Drop the frames in the ELS LOGO error path instead of just returning an error. This fixes the following kmemleak report: unreferenced object 0xffff880064cb1000 (size 424): comm "kworker/0:2", pid 24, jiffies 4294904293 (age 68.504s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<(____ptrval____)>] _fc_frame_alloc+0x2c/0x180 [libfc] [<(____ptrval____)>] fc_lport_enter_logo+0x106/0x360 [libfc] [<(____ptrval____)>] fc_fabric_logoff+0x8c/0xc0 [libfc] [<(____ptrval____)>] fcoe_if_destroy+0x79/0x3b0 [fcoe] [<(____ptrval____)>] fcoe_destroy_work+0xd2/0x170 [fcoe] [<(____ptrval____)>] process_one_work+0x7ff/0x1420 [<(____ptrval____)>] worker_thread+0x87/0xef0 [<(____ptrval____)>] kthread+0x2db/0x390 [<(____ptrval____)>] ret_from_fork+0x35/0x40 [<(____ptrval____)>] 0xffffffffffffffff which can be triggered by issuing echo eth0 > /sys/bus/fcoe/ctlr_destroy Signed-off-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/fcoe/fcoe_ctlr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index d38bda333f7a..03019e07abb9 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -754,9 +754,9 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, case ELS_LOGO: if (fip->mode == FIP_MODE_VN2VN) { if (fip->state != FIP_ST_VNMP_UP) - return -EINVAL; + goto drop; if (ntoh24(fh->fh_d_id) == FC_FID_FLOGI) - return -EINVAL; + goto drop; } else { if (fip->state != FIP_ST_ENABLED) return 0; -- GitLab From 4ce46fff750dbee21034080be293dde19f1697ff Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 31 Jul 2018 15:46:03 +0200 Subject: [PATCH 1113/1291] scsi: fcoe: clear FC_RP_STARTED flags when receiving a LOGO [ Upstream commit 1550ec458e0cf1a40a170ab1f4c46e3f52860f65 ] When receiving a LOGO request we forget to clear the FC_RP_STARTED flag before starting the rport delete routine. As the started flag was not cleared, we're not deleting the rport but waiting for a restart and thus are keeping the reference count of the rdata object at 1. This leads to the following kmemleak report: unreferenced object 0xffff88006542aa00 (size 512): comm "kworker/0:2", pid 24, jiffies 4294899222 (age 226.880s) hex dump (first 32 bytes): 68 96 fe 65 00 88 ff ff 00 00 00 00 00 00 00 00 h..e............ 01 00 00 00 08 00 00 00 02 c5 45 24 ac b8 00 10 ..........E$.... backtrace: [<(____ptrval____)>] fcoe_ctlr_vn_add.isra.5+0x7f/0x770 [libfcoe] [<(____ptrval____)>] fcoe_ctlr_vn_recv+0x12af/0x27f0 [libfcoe] [<(____ptrval____)>] fcoe_ctlr_recv_work+0xd01/0x32f0 [libfcoe] [<(____ptrval____)>] process_one_work+0x7ff/0x1420 [<(____ptrval____)>] worker_thread+0x87/0xef0 [<(____ptrval____)>] kthread+0x2db/0x390 [<(____ptrval____)>] ret_from_fork+0x35/0x40 [<(____ptrval____)>] 0xffffffffffffffff Signed-off-by: Johannes Thumshirn Reported-by: ard Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libfc/fc_rport.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 31d31aad3de1..89b1f1af2fd4 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -2164,6 +2164,7 @@ static void fc_rport_recv_logo_req(struct fc_lport *lport, struct fc_frame *fp) FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n", fc_rport_state(rdata)); + rdata->flags &= ~FC_RP_STARTED; fc_rport_enter_delete(rdata, RPORT_EV_STOP); mutex_unlock(&rdata->rp_mutex); kref_put(&rdata->kref, fc_rport_destroy); -- GitLab From 7bb880a11650bcc761806dbd1484c887c2617f73 Mon Sep 17 00:00:00 2001 From: Jim Gill Date: Thu, 2 Aug 2018 14:13:30 -0700 Subject: [PATCH 1114/1291] scsi: vmw_pvscsi: Return DID_RESET for status SAM_STAT_COMMAND_TERMINATED [ Upstream commit e95153b64d03c2b6e8d62e51bdcc33fcad6e0856 ] Commands that are reset are returned with status SAM_STAT_COMMAND_TERMINATED. PVSCSI currently returns DID_OK | SAM_STAT_COMMAND_TERMINATED which fails the command. Instead, set hostbyte to DID_RESET to allow upper layers to retry. Tested by copying a large file between two pvscsi disks on same adapter while performing a bus reset at 1-second intervals. Before fix, commands sometimes fail with DID_OK. After fix, commands observed to fail with DID_RESET. Signed-off-by: Jim Gill Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/vmw_pvscsi.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index 777e5f1e52d1..0cd947f78b5b 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -561,9 +561,14 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter, (btstat == BTSTAT_SUCCESS || btstat == BTSTAT_LINKED_COMMAND_COMPLETED || btstat == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG)) { - cmd->result = (DID_OK << 16) | sdstat; - if (sdstat == SAM_STAT_CHECK_CONDITION && cmd->sense_buffer) - cmd->result |= (DRIVER_SENSE << 24); + if (sdstat == SAM_STAT_COMMAND_TERMINATED) { + cmd->result = (DID_RESET << 16); + } else { + cmd->result = (DID_OK << 16) | sdstat; + if (sdstat == SAM_STAT_CHECK_CONDITION && + cmd->sense_buffer) + cmd->result |= (DRIVER_SENSE << 24); + } } else switch (btstat) { case BTSTAT_SUCCESS: -- GitLab From cf7ab2abc524ec8499fabfa064d9c1f754092908 Mon Sep 17 00:00:00 2001 From: "jie@chenjie6@huwei.com" Date: Fri, 10 Aug 2018 17:23:06 -0700 Subject: [PATCH 1115/1291] mm/memory.c: check return value of ioremap_prot [ Upstream commit 24eee1e4c47977bdfb71d6f15f6011e7b6188d04 ] ioremap_prot() can return NULL which could lead to an oops. Link: http://lkml.kernel.org/r/1533195441-58594-1-git-send-email-chenjie6@huawei.com Signed-off-by: chen jie Reviewed-by: Andrew Morton Cc: Li Zefan Cc: chenjie Cc: Yang Shi Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 1d61c5fe12ef..72e1dfa84819 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4348,6 +4348,9 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, return -EINVAL; maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot); + if (!maddr) + return -ENOMEM; + if (write) memcpy_toio(maddr + offset, buf, len); else -- GitLab From cfcfbe08d2986ecd5621e98a027e54dd1d549459 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Mon, 9 Jul 2018 12:21:44 +0300 Subject: [PATCH 1116/1291] mei: don't update offset in write commit a103af1b64d74853a5e08ca6c86aeb0e5c6ca4f1 upstream. MEI enables writes of complete messages only while read can be performed in parts, hence write should not update the file offset to not break interleaving partial reads with writes. Cc: Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 22efc039f302..8d1d40dbf744 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -291,7 +291,6 @@ static ssize_t mei_write(struct file *file, const char __user *ubuf, goto out; } - *offset = 0; cb = mei_cl_alloc_cb(cl, length, MEI_FOP_WRITE, file); if (!cb) { rets = -ENOMEM; -- GitLab From cba34b94077304d5a632286aad14ccbab68a5f98 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 28 Jun 2018 18:46:40 -0500 Subject: [PATCH 1117/1291] cifs: add missing debug entries for kconfig options commit 950132afd59385caf6e2b84e5235d069fa10681d upstream. /proc/fs/cifs/DebugData displays the features (Kconfig options) used to build cifs.ko but it was missing some, and needed comma separator. These can be useful in debugging certain problems so we know which optional features were enabled in the user's build. Also clarify them, by making them more closely match the corresponding CONFIG_CIFS_* parm. Old format: Features: dfs fscache posix spnego xattr acl New format: Features: DFS,FSCACHE,SMB_DIRECT,STATS,DEBUG2,ALLOW_INSECURE_LEGACY,CIFS_POSIX,UPCALL(SPNEGO),XATTR,ACL Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky Reviewed-by: Paulo Alcantara CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_debug.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index cbb9534b89b4..53c9c49f0fbb 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -123,25 +123,41 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "CIFS Version %s\n", CIFS_VERSION); seq_printf(m, "Features:"); #ifdef CONFIG_CIFS_DFS_UPCALL - seq_printf(m, " dfs"); + seq_printf(m, " DFS"); #endif #ifdef CONFIG_CIFS_FSCACHE - seq_printf(m, " fscache"); + seq_printf(m, ",FSCACHE"); +#endif +#ifdef CONFIG_CIFS_SMB_DIRECT + seq_printf(m, ",SMB_DIRECT"); +#endif +#ifdef CONFIG_CIFS_STATS2 + seq_printf(m, ",STATS2"); +#elif defined(CONFIG_CIFS_STATS) + seq_printf(m, ",STATS"); +#endif +#ifdef CONFIG_CIFS_DEBUG2 + seq_printf(m, ",DEBUG2"); +#elif defined(CONFIG_CIFS_DEBUG) + seq_printf(m, ",DEBUG"); +#endif +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + seq_printf(m, ",ALLOW_INSECURE_LEGACY"); #endif #ifdef CONFIG_CIFS_WEAK_PW_HASH - seq_printf(m, " lanman"); + seq_printf(m, ",WEAK_PW_HASH"); #endif #ifdef CONFIG_CIFS_POSIX - seq_printf(m, " posix"); + seq_printf(m, ",CIFS_POSIX"); #endif #ifdef CONFIG_CIFS_UPCALL - seq_printf(m, " spnego"); + seq_printf(m, ",UPCALL(SPNEGO)"); #endif #ifdef CONFIG_CIFS_XATTR - seq_printf(m, " xattr"); + seq_printf(m, ",XATTR"); #endif #ifdef CONFIG_CIFS_ACL - seq_printf(m, " acl"); + seq_printf(m, ",ACL"); #endif seq_putc(m, '\n'); seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); -- GitLab From d5f2790a7a1ec89f237ff6e8a261927a51d09128 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Thu, 23 Aug 2018 12:24:02 +0200 Subject: [PATCH 1118/1291] cifs: check kmalloc before use commit 126c97f4d0d1b5b956e8b0740c81a2b2a2ae548c upstream. The kmalloc was not being checked - if it fails issue a warning and return -ENOMEM to the caller. Signed-off-by: Nicholas Mc Guire Fixes: b8da344b74c8 ("cifs: dynamic allocation of ntlmssp blob") Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky cc: Stable ` Signed-off-by: Greg Kroah-Hartman --- fs/cifs/sess.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 8b0502cd39af..aa23c00367ec 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -398,6 +398,12 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer, goto setup_ntlmv2_ret; } *pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL); + if (!*pbuffer) { + rc = -ENOMEM; + cifs_dbg(VFS, "Error %d during NTLMSSP allocation\n", rc); + *buflen = 0; + goto setup_ntlmv2_ret; + } sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer; memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); -- GitLab From 82a856f527334ffd69aae26e7dd9e03b19c4a520 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 9 Aug 2018 14:33:12 -0500 Subject: [PATCH 1119/1291] smb3: enumerating snapshots was leaving part of the data off end commit e02789a53d71334b067ad72eee5d4e88a0158083 upstream. When enumerating snapshots, the last few bytes of the final snapshot could be left off since we were miscalculating the length returned (leaving off the sizeof struct SRV_SNAPSHOT_ARRAY) See MS-SMB2 section 2.2.32.2. In addition fixup the length used to allow smaller buffer to be passed in, in order to allow returning the size of the whole snapshot array more easily. Sample userspace output with a kernel patched with this (mounted to a Windows volume with two snapshots). Before this patch, the second snapshot would be missing a few bytes at the end. ~/cifs-2.6# ~/enum-snapshots /mnt/file press enter to issue the ioctl to retrieve snapshot information ... size of snapshot array = 102 Num snapshots: 2 Num returned: 2 Array Size: 102 Snapshot 0:@GMT-2018.06.30-19.34.17 Snapshot 1:@GMT-2018.06.30-19.33.37 CC: Stable Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 83267ac3a3f0..badbb50140b1 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1129,6 +1129,13 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon, } +/* GMT Token is @GMT-YYYY.MM.DD-HH.MM.SS Unicode which is 48 bytes + null */ +#define GMT_TOKEN_SIZE 50 + +/* + * Input buffer contains (empty) struct smb_snapshot array with size filled in + * For output see struct SRV_SNAPSHOT_ARRAY in MS-SMB2 section 2.2.32.2 + */ static int smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *cfile, void __user *ioc_buf) @@ -1158,14 +1165,27 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon, kfree(retbuf); return rc; } - if (snapshot_in.snapshot_array_size < sizeof(struct smb_snapshot_array)) { - rc = -ERANGE; - kfree(retbuf); - return rc; - } - if (ret_data_len > snapshot_in.snapshot_array_size) - ret_data_len = snapshot_in.snapshot_array_size; + /* + * Check for min size, ie not large enough to fit even one GMT + * token (snapshot). On the first ioctl some users may pass in + * smaller size (or zero) to simply get the size of the array + * so the user space caller can allocate sufficient memory + * and retry the ioctl again with larger array size sufficient + * to hold all of the snapshot GMT tokens on the second try. + */ + if (snapshot_in.snapshot_array_size < GMT_TOKEN_SIZE) + ret_data_len = sizeof(struct smb_snapshot_array); + + /* + * We return struct SRV_SNAPSHOT_ARRAY, followed by + * the snapshot array (of 50 byte GMT tokens) each + * representing an available previous version of the data + */ + if (ret_data_len > (snapshot_in.snapshot_array_size + + sizeof(struct smb_snapshot_array))) + ret_data_len = snapshot_in.snapshot_array_size + + sizeof(struct smb_snapshot_array); if (copy_to_user(ioc_buf, retbuf, ret_data_len)) rc = -EFAULT; -- GitLab From be1210c7758cb0aba9c4323fb2fe79e64568db77 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 2 Aug 2018 20:28:18 -0500 Subject: [PATCH 1120/1291] smb3: Do not send SMB3 SET_INFO if nothing changed commit fd09b7d3b352105f08b8e02f7afecf7e816380ef upstream. An earlier commit had a typo which prevented the optimization from working: commit 18dd8e1a65dd ("Do not send SMB3 SET_INFO request if nothing is changing") Thank you to Metze for noticing this. Also clear a reserved field in the FILE_BASIC_INFO struct we send that should be zero (all the other fields in that struct were set or cleared explicitly already in cifs_set_file_info). Reviewed-by: Pavel Shilovsky CC: Stable # 4.9.x+ Reported-by: Stefan Metzmacher Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/inode.c | 2 ++ fs/cifs/smb2inode.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 0c7b7e2a0919..caf9cf91b825 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1122,6 +1122,8 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, if (!server->ops->set_file_info) return -ENOSYS; + info_buf.Pad = 0; + if (attrs->ia_valid & ATTR_ATIME) { set_time = true; info_buf.LastAccessTime = diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 1238cd3552f9..0267d8cbc996 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -267,7 +267,7 @@ smb2_set_file_info(struct inode *inode, const char *full_path, int rc; if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) && - (buf->LastWriteTime == 0) && (buf->ChangeTime) && + (buf->LastWriteTime == 0) && (buf->ChangeTime == 0) && (buf->Attributes == 0)) return 0; /* would be a no op, no sense sending this */ -- GitLab From 66913d23eeda437ffb8651e5d68e612a80e5b152 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 27 Jul 2018 22:01:49 -0500 Subject: [PATCH 1121/1291] smb3: don't request leases in symlink creation and query commit 22783155f4bf956c346a81624ec9258930a6fe06 upstream. Fixes problem pointed out by Pavel in discussions about commit 729c0c9dd55204f0c9a823ac8a7bfa83d36c7e78 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg CC: Stable # 3.18.x+ Signed-off-by: Greg Kroah-Hartman --- fs/cifs/link.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 889a840172eb..9451a7f6893d 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -396,7 +396,7 @@ smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_io_parms io_parms; int buf_type = CIFS_NO_BUFFER; __le16 *utf16_path; - __u8 oplock = SMB2_OPLOCK_LEVEL_II; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; struct smb2_file_all_info *pfile_info = NULL; oparms.tcon = tcon; @@ -458,7 +458,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_io_parms io_parms; int create_options = CREATE_NOT_DIR; __le16 *utf16_path; - __u8 oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; struct kvec iov[2]; if (backup_cred(cifs_sb)) -- GitLab From 758f55f918333f22d84f2f73e279f58bb5b81515 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 24 Jun 2018 23:18:52 -0500 Subject: [PATCH 1122/1291] smb3: fill in statfs fsid and correct namelen commit 21ba3845b59c733a79ed4fe1c4f3732e7ece9df7 upstream. Fil in the correct namelen (typically 255 not 4096) in the statfs response and also fill in a reasonably unique fsid (in this case taken from the volume id, and the creation time of the volume). In the case of the POSIX statfs all fields are now filled in, and in the case of non-POSIX mounts, all fields are filled in which can be. Signed-off-by: Steve French CC: Stable Reviewed-by: Aurelien Aptel Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsfs.c | 18 ++++++++++-------- fs/cifs/smb2ops.c | 2 ++ fs/cifs/smb2pdu.c | 8 ++++++++ fs/cifs/smb2pdu.h | 11 +++++++++++ 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 490c5fc9e69c..44a7b2dea688 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -197,14 +197,16 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) xid = get_xid(); - /* - * PATH_MAX may be too long - it would presumably be total path, - * but note that some servers (includinng Samba 3) have a shorter - * maximum path. - * - * Instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO. - */ - buf->f_namelen = PATH_MAX; + if (le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength) > 0) + buf->f_namelen = + le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength); + else + buf->f_namelen = PATH_MAX; + + buf->f_fsid.val[0] = tcon->vol_serial_number; + /* are using part of create time for more randomness, see man statfs */ + buf->f_fsid.val[1] = (int)le64_to_cpu(tcon->vol_create_time); + buf->f_files = 0; /* undefined */ buf->f_ffree = 0; /* unlimited */ diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index badbb50140b1..e9f246fe9d80 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -332,6 +332,8 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) FS_ATTRIBUTE_INFORMATION); SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid, FS_DEVICE_INFORMATION); + SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid, + FS_VOLUME_INFORMATION); SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid, FS_SECTOR_SIZE_INFORMATION); /* SMB3 specific */ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 71b81980787f..e317e9a400c1 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3455,6 +3455,9 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, } else if (level == FS_SECTOR_SIZE_INFORMATION) { max_len = sizeof(struct smb3_fs_ss_info); min_len = sizeof(struct smb3_fs_ss_info); + } else if (level == FS_VOLUME_INFORMATION) { + max_len = sizeof(struct smb3_fs_vol_info) + MAX_VOL_LABEL_LEN; + min_len = sizeof(struct smb3_fs_vol_info); } else { cifs_dbg(FYI, "Invalid qfsinfo level %d\n", level); return -EINVAL; @@ -3495,6 +3498,11 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, tcon->ss_flags = le32_to_cpu(ss_info->Flags); tcon->perf_sector_size = le32_to_cpu(ss_info->PhysicalBytesPerSectorForPerf); + } else if (level == FS_VOLUME_INFORMATION) { + struct smb3_fs_vol_info *vol_info = (struct smb3_fs_vol_info *) + (offset + (char *)rsp); + tcon->vol_serial_number = vol_info->VolumeSerialNumber; + tcon->vol_create_time = vol_info->VolumeCreationTime; } qfsattr_exit: diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index c2ec934be968..e52454059725 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -1108,6 +1108,17 @@ struct smb3_fs_ss_info { __le32 ByteOffsetForPartitionAlignment; } __packed; +/* volume info struct - see MS-FSCC 2.5.9 */ +#define MAX_VOL_LABEL_LEN 32 +struct smb3_fs_vol_info { + __le64 VolumeCreationTime; + __u32 VolumeSerialNumber; + __le32 VolumeLabelLength; /* includes trailing null */ + __u8 SupportsObjects; /* True if eg like NTFS, supports objects */ + __u8 Reserved; + __u8 VolumeLabel[0]; /* variable len */ +} __packed; + /* partial list of QUERY INFO levels */ #define FILE_DIRECTORY_INFORMATION 1 #define FILE_FULL_DIRECTORY_INFORMATION 2 -- GitLab From 770025cc4b6918f6ef2591146920d693a13be168 Mon Sep 17 00:00:00 2001 From: Ethan Lien Date: Mon, 2 Jul 2018 15:44:58 +0800 Subject: [PATCH 1123/1291] btrfs: use correct compare function of dirty_metadata_bytes commit d814a49198eafa6163698bdd93961302f3a877a4 upstream. We use customized, nodesize batch value to update dirty_metadata_bytes. We should also use batch version of compare function or we will easily goto fast path and get false result from percpu_counter_compare(). Fixes: e2d845211eda ("Btrfs: use percpu counter for dirty metadata count") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Ethan Lien Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b475d1ebbbbf..5cf1bbe9754c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1098,8 +1098,9 @@ static int btree_writepages(struct address_space *mapping, fs_info = BTRFS_I(mapping->host)->root->fs_info; /* this is a bit racy, but that's ok */ - ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes, - BTRFS_DIRTY_METADATA_THRESH); + ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes, + BTRFS_DIRTY_METADATA_THRESH, + fs_info->dirty_metadata_batch); if (ret < 0) return 0; } @@ -4030,8 +4031,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info, if (flush_delayed) btrfs_balance_delayed_items(fs_info); - ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes, - BTRFS_DIRTY_METADATA_THRESH); + ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes, + BTRFS_DIRTY_METADATA_THRESH, + fs_info->dirty_metadata_batch); if (ret > 0) { balance_dirty_pages_ratelimited(fs_info->btree_inode->i_mapping); } -- GitLab From e7457f97d2af26bfdda028b691bc11363296b3a9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 19 Jul 2018 10:49:51 -0400 Subject: [PATCH 1124/1291] btrfs: don't leak ret from do_chunk_alloc commit 4559b0a71749c442d34f7cfb9e72c9e58db83948 upstream. If we're trying to make a data reservation and we have to allocate a data chunk we could leak ret == 1, as do_chunk_alloc() will return 1 if it allocated a chunk. Since the end of the function is the success path just return 0. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 53487102081d..bbabe37c2e8c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4407,7 +4407,7 @@ int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes) data_sinfo->flags, bytes, 1); spin_unlock(&data_sinfo->lock); - return ret; + return 0; } int btrfs_check_data_free_space(struct inode *inode, -- GitLab From f91ca31f5379a6979d0a83aaac215def130ccb15 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 20 Jul 2018 11:46:10 -0700 Subject: [PATCH 1125/1291] Btrfs: fix btrfs_write_inode vs delayed iput deadlock commit 3c4276936f6fbe52884b4ea4e6cc120b890a0f9f upstream. We recently ran into the following deadlock involving btrfs_write_inode(): [ +0.005066] __schedule+0x38e/0x8c0 [ +0.007144] schedule+0x36/0x80 [ +0.006447] bit_wait+0x11/0x60 [ +0.006446] __wait_on_bit+0xbe/0x110 [ +0.007487] ? bit_wait_io+0x60/0x60 [ +0.007319] __inode_wait_for_writeback+0x96/0xc0 [ +0.009568] ? autoremove_wake_function+0x40/0x40 [ +0.009565] inode_wait_for_writeback+0x21/0x30 [ +0.009224] evict+0xb0/0x190 [ +0.006099] iput+0x1a8/0x210 [ +0.006103] btrfs_run_delayed_iputs+0x73/0xc0 [ +0.009047] btrfs_commit_transaction+0x799/0x8c0 [ +0.009567] btrfs_write_inode+0x81/0xb0 [ +0.008008] __writeback_single_inode+0x267/0x320 [ +0.009569] writeback_sb_inodes+0x25b/0x4e0 [ +0.008702] wb_writeback+0x102/0x2d0 [ +0.007487] wb_workfn+0xa4/0x310 [ +0.006794] ? wb_workfn+0xa4/0x310 [ +0.007143] process_one_work+0x150/0x410 [ +0.008179] worker_thread+0x6d/0x520 [ +0.007490] kthread+0x12c/0x160 [ +0.006620] ? put_pwq_unlocked+0x80/0x80 [ +0.008185] ? kthread_park+0xa0/0xa0 [ +0.007484] ? do_syscall_64+0x53/0x150 [ +0.007837] ret_from_fork+0x29/0x40 Writeback calls: btrfs_write_inode btrfs_commit_transaction btrfs_run_delayed_iputs If iput() is called on that same inode, evict() will wait for writeback forever. btrfs_write_inode() was originally added way back in 4730a4bc5bf3 ("btrfs_dirty_inode") to support O_SYNC writes. However, ->write_inode() hasn't been used for O_SYNC since 148f948ba877 ("vfs: Introduce new helpers for syncing after writing to O_SYNC file or IS_SYNC inode"), so btrfs_write_inode() is actually unnecessary (and leads to a bunch of unnecessary commits). Get rid of it, which also gets rid of the deadlock. CC: stable@vger.kernel.org # 3.2+ Signed-off-by: Josef Bacik [Omar: new commit message] Signed-off-by: Omar Sandoval Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 26 -------------------------- fs/btrfs/super.c | 1 - 2 files changed, 27 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 28a58f40f3a4..e8bfafa25a71 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6152,32 +6152,6 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) return ret; } -int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; - int ret = 0; - bool nolock = false; - - if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) - return 0; - - if (btrfs_fs_closing(root->fs_info) && - btrfs_is_free_space_inode(BTRFS_I(inode))) - nolock = true; - - if (wbc->sync_mode == WB_SYNC_ALL) { - if (nolock) - trans = btrfs_join_transaction_nolock(root); - else - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans); - } - return ret; -} - /* * This is somewhat expensive, updating the tree every time the * inode changes. But, it is most likely to find the inode in cache. diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8e3ce81d3f44..fe960d5e8913 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2271,7 +2271,6 @@ static const struct super_operations btrfs_super_ops = { .sync_fs = btrfs_sync_fs, .show_options = btrfs_show_options, .show_devname = btrfs_show_devname, - .write_inode = btrfs_write_inode, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .statfs = btrfs_statfs, -- GitLab From b48522b7887a2a0fc387e6f6b2dd84c9741d4fbd Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Thu, 19 Jul 2018 23:23:56 +0530 Subject: [PATCH 1126/1291] iommu/arm-smmu: Error out only if not enough context interrupts commit d1e20222d5372e951bbb2fd3f6489ec4a6ea9b11 upstream. Currently we check if the number of context banks is not equal to num_context_interrupts. However, there are booloaders such as, one on sdm845 that reserves few context banks and thus kernel views less than the total available context banks. So, although the hardware definition in device tree would mention the correct number of context interrupts, this number can be greater than the number of context banks visible to smmu in kernel. We should therefore error out only when the number of context banks is greater than the available number of context interrupts. Signed-off-by: Vivek Gautam Suggested-by: Tomasz Figa Cc: Robin Murphy Cc: Will Deacon [will: drop useless printk] Signed-off-by: Will Deacon Cc: Jitendra Bhivare Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm-smmu.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 3bdb799d3b4b..2c436376f13e 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -2100,12 +2100,16 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (err) return err; - if (smmu->version == ARM_SMMU_V2 && - smmu->num_context_banks != smmu->num_context_irqs) { - dev_err(dev, - "found only %d context interrupt(s) but %d required\n", - smmu->num_context_irqs, smmu->num_context_banks); - return -ENODEV; + if (smmu->version == ARM_SMMU_V2) { + if (smmu->num_context_banks > smmu->num_context_irqs) { + dev_err(dev, + "found only %d context irq(s) but %d required\n", + smmu->num_context_irqs, smmu->num_context_banks); + return -ENODEV; + } + + /* Ignore superfluous interrupts */ + smmu->num_context_irqs = smmu->num_context_banks; } for (i = 0; i < smmu->num_global_irqs; ++i) { -- GitLab From 646e7c04803fb98fee2b465480caf78a1fb0173f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 27 Jun 2018 16:08:15 +0200 Subject: [PATCH 1127/1291] printk: Split the code for storing a message into the log buffer commit ba552399954dde1b388f7749fecad5c349216981 upstream. It is just a preparation step. The patch does not change the existing behavior. Link: http://lkml.kernel.org/r/20180627140817.27764-2-pmladek@suse.com To: Steven Rostedt Cc: Peter Zijlstra Cc: Tetsuo Handa Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Greg Kroah-Hartman --- kernel/printk/printk.c | 43 +++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 512f7c2baedd..f90af2b6c956 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1680,28 +1680,16 @@ static size_t log_output(int facility, int level, enum log_flags lflags, const c return log_store(facility, level, lflags, 0, dict, dictlen, text, text_len); } -asmlinkage int vprintk_emit(int facility, int level, - const char *dict, size_t dictlen, - const char *fmt, va_list args) +/* Must be called under logbuf_lock. */ +int vprintk_store(int facility, int level, + const char *dict, size_t dictlen, + const char *fmt, va_list args) { static char textbuf[LOG_LINE_MAX]; char *text = textbuf; size_t text_len; enum log_flags lflags = 0; - unsigned long flags; - int printed_len; - bool in_sched = false; - - if (level == LOGLEVEL_SCHED) { - level = LOGLEVEL_DEFAULT; - in_sched = true; - } - - boot_delay_msec(level); - printk_delay(); - /* This stops the holder of console_sem just where we want him */ - logbuf_lock_irqsave(flags); /* * The printf needs to come first; we need the syslog * prefix which might be passed-in as a parameter. @@ -1742,8 +1730,29 @@ asmlinkage int vprintk_emit(int facility, int level, if (dict) lflags |= LOG_PREFIX|LOG_NEWLINE; - printed_len = log_output(facility, level, lflags, dict, dictlen, text, text_len); + return log_output(facility, level, lflags, + dict, dictlen, text, text_len); +} +asmlinkage int vprintk_emit(int facility, int level, + const char *dict, size_t dictlen, + const char *fmt, va_list args) +{ + int printed_len; + bool in_sched = false; + unsigned long flags; + + if (level == LOGLEVEL_SCHED) { + level = LOGLEVEL_DEFAULT; + in_sched = true; + } + + boot_delay_msec(level); + printk_delay(); + + /* This stops the holder of console_sem just where we want him */ + logbuf_lock_irqsave(flags); + printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args); logbuf_unlock_irqrestore(flags); /* If called from the scheduler, we can not call up(). */ -- GitLab From 943276ef14c7041449ad46e6abf9a0afb4b77122 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 27 Jun 2018 16:08:16 +0200 Subject: [PATCH 1128/1291] printk: Create helper function to queue deferred console handling commit a338f84dc196f44b63ba0863d2f34fd9b1613572 upstream. It is just a preparation step. The patch does not change the existing behavior. Link: http://lkml.kernel.org/r/20180627140817.27764-3-pmladek@suse.com To: Steven Rostedt Cc: Peter Zijlstra Cc: Tetsuo Handa Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Greg Kroah-Hartman --- kernel/printk/printk.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f90af2b6c956..f0223a7d9ed1 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2723,16 +2723,20 @@ void wake_up_klogd(void) preempt_enable(); } -int vprintk_deferred(const char *fmt, va_list args) +void defer_console_output(void) { - int r; - - r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args); - preempt_disable(); __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); preempt_enable(); +} + +int vprintk_deferred(const char *fmt, va_list args) +{ + int r; + + r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args); + defer_console_output(); return r; } -- GitLab From cd71265a8cd6a41c54dcfd594b0e653740cf1282 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 27 Jun 2018 16:20:28 +0200 Subject: [PATCH 1129/1291] printk/nmi: Prevent deadlock when accessing the main log buffer in NMI commit 03fc7f9c99c1e7ae2925d459e8487f1a6f199f79 upstream. The commit 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI when logbuf_lock is available") brought back the possible deadlocks in printk() and NMI. The check of logbuf_lock is done only in printk_nmi_enter() to prevent mixed output. But another CPU might take the lock later, enter NMI, and: + Both NMIs might be serialized by yet another lock, for example, the one in nmi_cpu_backtrace(). + The other CPU might get stopped in NMI, see smp_send_stop() in panic(). The only safe solution is to use trylock when storing the message into the main log-buffer. It might cause reordering when some lines go to the main lock buffer directly and others are delayed via the per-CPU buffer. It means that it is not useful in general. This patch replaces the problematic NMI deferred context with NMI direct context. It can be used to mark a code that might produce many messages in NMI and the risk of losing them is more critical than problems with eventual reordering. The context is then used when dumping trace buffers on oops. It was the primary motivation for the original fix. Also the reordering is even smaller issue there because some traces have their own time stamps. Finally, nmi_cpu_backtrace() need not longer be serialized because it will always us the per-CPU buffers again. Fixes: 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI when logbuf_lock is available") Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180627142028.11259-1-pmladek@suse.com To: Steven Rostedt Cc: Peter Zijlstra Cc: Tetsuo Handa Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Greg Kroah-Hartman --- include/linux/printk.h | 4 +++ kernel/printk/internal.h | 9 +++++- kernel/printk/printk_safe.c | 58 +++++++++++++++++++++++-------------- kernel/trace/trace.c | 4 ++- lib/nmi_backtrace.c | 3 -- 5 files changed, 52 insertions(+), 26 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index 335926039adc..6106befed756 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -150,9 +150,13 @@ void early_printk(const char *s, ...) { } #ifdef CONFIG_PRINTK_NMI extern void printk_nmi_enter(void); extern void printk_nmi_exit(void); +extern void printk_nmi_direct_enter(void); +extern void printk_nmi_direct_exit(void); #else static inline void printk_nmi_enter(void) { } static inline void printk_nmi_exit(void) { } +static inline void printk_nmi_direct_enter(void) { } +static inline void printk_nmi_direct_exit(void) { } #endif /* PRINTK_NMI */ #ifdef CONFIG_PRINTK diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 2a7d04049af4..0f1898820cba 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -19,11 +19,16 @@ #ifdef CONFIG_PRINTK #define PRINTK_SAFE_CONTEXT_MASK 0x3fffffff -#define PRINTK_NMI_DEFERRED_CONTEXT_MASK 0x40000000 +#define PRINTK_NMI_DIRECT_CONTEXT_MASK 0x40000000 #define PRINTK_NMI_CONTEXT_MASK 0x80000000 extern raw_spinlock_t logbuf_lock; +__printf(5, 0) +int vprintk_store(int facility, int level, + const char *dict, size_t dictlen, + const char *fmt, va_list args); + __printf(1, 0) int vprintk_default(const char *fmt, va_list args); __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); __printf(1, 0) int vprintk_func(const char *fmt, va_list args); @@ -54,6 +59,8 @@ void __printk_safe_exit(void); local_irq_enable(); \ } while (0) +void defer_console_output(void); + #else __printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 64825b2df3a5..d482fd61ac67 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -311,24 +311,33 @@ static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args) void printk_nmi_enter(void) { - /* - * The size of the extra per-CPU buffer is limited. Use it only when - * the main one is locked. If this CPU is not in the safe context, - * the lock must be taken on another CPU and we could wait for it. - */ - if ((this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) && - raw_spin_is_locked(&logbuf_lock)) { - this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); - } else { - this_cpu_or(printk_context, PRINTK_NMI_DEFERRED_CONTEXT_MASK); - } + this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); } void printk_nmi_exit(void) { - this_cpu_and(printk_context, - ~(PRINTK_NMI_CONTEXT_MASK | - PRINTK_NMI_DEFERRED_CONTEXT_MASK)); + this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK); +} + +/* + * Marks a code that might produce many messages in NMI context + * and the risk of losing them is more critical than eventual + * reordering. + * + * It has effect only when called in NMI context. Then printk() + * will try to store the messages into the main logbuf directly + * and use the per-CPU buffers only as a fallback when the lock + * is not available. + */ +void printk_nmi_direct_enter(void) +{ + if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) + this_cpu_or(printk_context, PRINTK_NMI_DIRECT_CONTEXT_MASK); +} + +void printk_nmi_direct_exit(void) +{ + this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK); } #else @@ -366,6 +375,20 @@ void __printk_safe_exit(void) __printf(1, 0) int vprintk_func(const char *fmt, va_list args) { + /* + * Try to use the main logbuf even in NMI. But avoid calling console + * drivers that might have their own locks. + */ + if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) && + raw_spin_trylock(&logbuf_lock)) { + int len; + + len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); + raw_spin_unlock(&logbuf_lock); + defer_console_output(); + return len; + } + /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */ if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) return vprintk_nmi(fmt, args); @@ -374,13 +397,6 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args) if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) return vprintk_safe(fmt, args); - /* - * Use the main logbuf when logbuf_lock is available in NMI. - * But avoid calling console drivers that might have their own locks. - */ - if (this_cpu_read(printk_context) & PRINTK_NMI_DEFERRED_CONTEXT_MASK) - return vprintk_deferred(fmt, args); - /* No obstacles. */ return vprintk_default(fmt, args); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fbc75c84076e..b7302c37c064 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8187,6 +8187,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) tracing_off(); local_irq_save(flags); + printk_nmi_direct_enter(); /* Simulate the iterator */ trace_init_global_iter(&iter); @@ -8266,7 +8267,8 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) for_each_tracing_cpu(cpu) { atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); } - atomic_dec(&dump_running); + atomic_dec(&dump_running); + printk_nmi_direct_exit(); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(ftrace_dump); diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 46e4c749e4eb..70b1f9d830cd 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -87,11 +87,9 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, bool nmi_cpu_backtrace(struct pt_regs *regs) { - static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; int cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - arch_spin_lock(&lock); if (regs && cpu_in_idle(instruction_pointer(regs))) { pr_warn("NMI backtrace for cpu %d skipped: idling at pc %#lx\n", cpu, instruction_pointer(regs)); @@ -102,7 +100,6 @@ bool nmi_cpu_backtrace(struct pt_regs *regs) else dump_stack(); } - arch_spin_unlock(&lock); cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); return true; } -- GitLab From a8affa695373b4f39c49fe329bb24369281734d3 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 28 Apr 2018 21:38:04 +0900 Subject: [PATCH 1130/1291] kprobes/arm64: Fix %p uses in error messages commit 0722867dcbc28cc9b269b57acd847c7c1aa638d6 upstream. Fix %p uses in error messages by removing it because those are redundant or meaningless. Signed-off-by: Masami Hiramatsu Acked-by: Will Deacon Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Arnd Bergmann Cc: David Howells Cc: David S . Miller Cc: Heiko Carstens Cc: Jon Medhurst Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thomas Richter Cc: Tobin C . Harding Cc: acme@kernel.org Cc: akpm@linux-foundation.org Cc: brueckner@linux.vnet.ibm.com Cc: linux-arch@vger.kernel.org Cc: rostedt@goodmis.org Cc: schwidefsky@de.ibm.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/lkml/152491908405.9916.12425053035317241111.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/probes/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index d849d9804011..22a5921562c7 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -275,7 +275,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, break; case KPROBE_HIT_SS: case KPROBE_REENTER: - pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr); + pr_warn("Unrecoverable kprobe detected.\n"); dump_kprobe(p); BUG(); break; -- GitLab From 5a56b307992e94e520afaf5637afe9f597412b1e Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Wed, 15 Aug 2018 12:51:21 -0700 Subject: [PATCH 1131/1291] arm64: mm: check for upper PAGE_SHIFT bits in pfn_valid() commit 5ad356eabc47d26a92140a0c4b20eba471c10de3 upstream. ARM64's pfn_valid() shifts away the upper PAGE_SHIFT bits of the input before seeing if the PFN is valid. This leads to false positives when some of the upper bits are set, but the lower bits match a valid PFN. For example, the following userspace code looks up a bogus entry in /proc/kpageflags: int pagemap = open("/proc/self/pagemap", O_RDONLY); int pageflags = open("/proc/kpageflags", O_RDONLY); uint64_t pfn, val; lseek64(pagemap, [...], SEEK_SET); read(pagemap, &pfn, sizeof(pfn)); if (pfn & (1UL << 63)) { /* valid PFN */ pfn &= ((1UL << 55) - 1); /* clear flag bits */ pfn |= (1UL << 55); lseek64(pageflags, pfn * sizeof(uint64_t), SEEK_SET); read(pageflags, &val, sizeof(val)); } On ARM64 this causes the userspace process to crash with SIGSEGV rather than reading (1 << KPF_NOPAGE). kpageflags_read() treats the offset as valid, and stable_page_flags() will try to access an address between the user and kernel address ranges. Fixes: c1cc1552616d ("arm64: MMU initialisation") Cc: stable@vger.kernel.org Signed-off-by: Greg Hackmann Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/init.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1190d90e01e6..caa295cd5d09 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -287,7 +287,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) #ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { - return memblock_is_map_memory(pfn << PAGE_SHIFT); + phys_addr_t addr = pfn << PAGE_SHIFT; + + if ((addr >> PAGE_SHIFT) != pfn) + return 0; + return memblock_is_map_memory(addr); } EXPORT_SYMBOL(pfn_valid); #endif -- GitLab From 75677d72be74c35d1a2d67e283c3b3d2eb0c49eb Mon Sep 17 00:00:00 2001 From: Huibin Hong Date: Fri, 6 Jul 2018 16:03:57 +0800 Subject: [PATCH 1132/1291] arm64: dts: rockchip: corrected uart1 clock-names for rk3328 commit d0414fdd58eb51ffd6528280fd66705123663964 upstream. Corrected the uart clock-names or the uart driver might fail. Fixes: 52e02d377a72 ("arm64: dts: rockchip: add core dtsi file for RK3328 SoCs") Cc: stable@vger.kernel.org Signed-off-by: Huibin Hong Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index d70e409e2b0c..efac2202b16e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -331,7 +331,7 @@ uart1: serial@ff120000 { reg = <0x0 0xff120000 0x0 0x100>; interrupts = ; clocks = <&cru SCLK_UART1>, <&cru PCLK_UART1>; - clock-names = "sclk_uart", "pclk_uart"; + clock-names = "baudclk", "apb_pclk"; dmas = <&dmac 4>, <&dmac 5>; #dma-cells = <2>; pinctrl-names = "default"; -- GitLab From 792a039415dc4562918abff4740ed04c2034f55b Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Mon, 13 Aug 2018 11:43:50 +0100 Subject: [PATCH 1133/1291] KVM: arm/arm64: Skip updating PMD entry if no change commit 86658b819cd0a9aa584cd84453ed268a6f013770 upstream. Contention on updating a PMD entry by a large number of vcpus can lead to duplicate work when handling stage 2 page faults. As the page table update follows the break-before-make requirement of the architecture, it can lead to repeated refaults due to clearing the entry and flushing the tlbs. This problem is more likely when - * there are large number of vcpus * the mapping is large block mapping such as when using PMD hugepages (512MB) with 64k pages. Fix this by skipping the page table update if there is no change in the entry being updated. Cc: stable@vger.kernel.org Fixes: ad361f093c1e ("KVM: ARM: Support hugetlbfs backed huge pages") Reviewed-by: Suzuki Poulose Acked-by: Christoffer Dall Signed-off-by: Punit Agrawal Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/mmu.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index b69798a7880e..d88f1e3fa52c 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -901,19 +901,35 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache pmd = stage2_get_pmd(kvm, cache, addr); VM_BUG_ON(!pmd); - /* - * Mapping in huge pages should only happen through a fault. If a - * page is merged into a transparent huge page, the individual - * subpages of that huge page should be unmapped through MMU - * notifiers before we get here. - * - * Merging of CompoundPages is not supported; they should become - * splitting first, unmapped, merged, and mapped back in on-demand. - */ - VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); - old_pmd = *pmd; if (pmd_present(old_pmd)) { + /* + * Multiple vcpus faulting on the same PMD entry, can + * lead to them sequentially updating the PMD with the + * same value. Following the break-before-make + * (pmd_clear() followed by tlb_flush()) process can + * hinder forward progress due to refaults generated + * on missing translations. + * + * Skip updating the page table if the entry is + * unchanged. + */ + if (pmd_val(old_pmd) == pmd_val(*new_pmd)) + return 0; + + /* + * Mapping in huge pages should only happen through a + * fault. If a page is merged into a transparent huge + * page, the individual subpages of that huge page + * should be unmapped through MMU notifiers before we + * get here. + * + * Merging of CompoundPages is not supported; they + * should become splitting first, unmapped, merged, + * and mapped back in on-demand. + */ + VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); + pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { -- GitLab From 4a06fdf2c490666e7e8d2a3c17adfff3f0354441 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Mon, 13 Aug 2018 11:43:51 +0100 Subject: [PATCH 1134/1291] KVM: arm/arm64: Skip updating PTE entry if no change commit 976d34e2dab10ece5ea8fe7090b7692913f89084 upstream. When there is contention on faulting in a particular page table entry at stage 2, the break-before-make requirement of the architecture can lead to additional refaulting due to TLB invalidation. Avoid this by skipping a page table update if the new value of the PTE matches the previous value. Cc: stable@vger.kernel.org Fixes: d5d8184d35c9 ("KVM: ARM: Memory virtualization setup") Reviewed-by: Suzuki Poulose Acked-by: Christoffer Dall Signed-off-by: Punit Agrawal Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/mmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index d88f1e3fa52c..ec275b8472a9 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -985,6 +985,10 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, /* Create 2nd stage page table mapping - Level 3 */ old_pte = *pte; if (pte_present(old_pte)) { + /* Skip page table update if there is no change */ + if (pte_val(old_pte) == pte_val(*new_pte)) + return 0; + kvm_set_pte(pte, __pte(0)); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { -- GitLab From 97f76f3bc4062aebeae14607daff015643d0ac4a Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Mon, 16 Jul 2018 10:38:57 +0200 Subject: [PATCH 1135/1291] s390/kvm: fix deadlock when killed by oom commit 306d6c49ac9ded11114cb53b0925da52f2c2ada1 upstream. When the oom killer kills a userspace process in the page fault handler while in guest context, the fault handler fails to release the mm_sem if the FAULT_FLAG_RETRY_NOWAIT option is set. This leads to a deadlock when tearing down the mm when the process terminates. This bug can only happen when pfault is enabled, so only KVM clients are affected. The problem arises in the rare cases in which handle_mm_fault does not release the mm_sem. This patch fixes the issue by manually releasing the mm_sem when needed. Fixes: 24eb3a824c4f3 ("KVM: s390: Add FAULT_FLAG_RETRY_NOWAIT for guest fault") Cc: # 3.15+ Signed-off-by: Claudio Imbrenda Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/fault.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 242b78c0a9ec..40f1888bc4ab 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -486,6 +486,8 @@ static inline int do_exception(struct pt_regs *regs, int access) /* No reason to continue if interrupted by SIGKILL. */ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { fault = VM_FAULT_SIGNAL; + if (flags & FAULT_FLAG_RETRY_NOWAIT) + goto out_up; goto out; } if (unlikely(fault & VM_FAULT_ERROR)) -- GitLab From e72107b2d995022a1fc95a33050410cf315df0fe Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Jul 2018 13:21:40 +0200 Subject: [PATCH 1136/1291] stop_machine: Reflow cpu_stop_queue_two_works() commit b80a2bfce85e1051056d98d04ecb2d0b55cbbc1c upstream. The code flow in cpu_stop_queue_two_works() is a little arcane; fix this by lifting the preempt_disable() to the top to create more natural nesting wrt the spinlocks and make the wake_up_q() and preempt_enable() unconditional at the end. Furthermore, enable preemption in the -EDEADLK case, such that we spin-wait with preemption enabled. Suggested-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Sebastian Andrzej Siewior Cc: isaacm@codeaurora.org Cc: matt@codeblueprint.co.uk Cc: psodagud@codeaurora.org Cc: gregkh@linuxfoundation.org Cc: pkondeti@codeaurora.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180730112140.GH2494@hirez.programming.kicks-ass.net Signed-off-by: Greg Kroah-Hartman --- kernel/stop_machine.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index e190d1ef3a23..34b6652e8677 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -236,13 +236,24 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); DEFINE_WAKE_Q(wakeq); int err; + retry: + /* + * The waking up of stopper threads has to happen in the same + * scheduling context as the queueing. Otherwise, there is a + * possibility of one of the above stoppers being woken up by another + * CPU, and preempting us. This will cause us to not wake up the other + * stopper forever. + */ + preempt_disable(); raw_spin_lock_irq(&stopper1->lock); raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); - err = -ENOENT; - if (!stopper1->enabled || !stopper2->enabled) + if (!stopper1->enabled || !stopper2->enabled) { + err = -ENOENT; goto unlock; + } + /* * Ensure that if we race with __stop_cpus() the stoppers won't get * queued up in reverse order leading to system deadlock. @@ -253,36 +264,30 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, * It can be falsely true but it is safe to spin until it is cleared, * queue_stop_cpus_work() does everything under preempt_disable(). */ - err = -EDEADLK; - if (unlikely(stop_cpus_in_progress)) - goto unlock; + if (unlikely(stop_cpus_in_progress)) { + err = -EDEADLK; + goto unlock; + } err = 0; __cpu_stop_queue_work(stopper1, work1, &wakeq); __cpu_stop_queue_work(stopper2, work2, &wakeq); - /* - * The waking up of stopper threads has to happen - * in the same scheduling context as the queueing. - * Otherwise, there is a possibility of one of the - * above stoppers being woken up by another CPU, - * and preempting us. This will cause us to n ot - * wake up the other stopper forever. - */ - preempt_disable(); + unlock: raw_spin_unlock(&stopper2->lock); raw_spin_unlock_irq(&stopper1->lock); if (unlikely(err == -EDEADLK)) { + preempt_enable(); + while (stop_cpus_in_progress) cpu_relax(); + goto retry; } - if (!err) { - wake_up_q(&wakeq); - preempt_enable(); - } + wake_up_q(&wakeq); + preempt_enable(); return err; } -- GitLab From bd0f93a630ff3f47daf7e6258ac08d731ebb6393 Mon Sep 17 00:00:00 2001 From: Prasad Sodagudi Date: Fri, 3 Aug 2018 13:56:06 -0700 Subject: [PATCH 1137/1291] stop_machine: Atomically queue and wake stopper threads commit cfd355145c32bb7ccb65fccbe2d67280dc2119e1 upstream. When cpu_stop_queue_work() releases the lock for the stopper thread that was queued into its wake queue, preemption is enabled, which leads to the following deadlock: CPU0 CPU1 sched_setaffinity(0, ...) __set_cpus_allowed_ptr() stop_one_cpu(0, ...) stop_two_cpus(0, 1, ...) cpu_stop_queue_work(0, ...) cpu_stop_queue_two_works(0, ..., 1, ...) -grabs lock for migration/0- -spins with preemption disabled, waiting for migration/0's lock to be released- -adds work items for migration/0 and queues migration/0 to its wake_q- -releases lock for migration/0 and preemption is enabled- -current thread is preempted, and __set_cpus_allowed_ptr has changed the thread's cpu allowed mask to CPU1 only- -acquires migration/0 and migration/1's locks- -adds work for migration/0 but does not add migration/0 to wake_q, since it is already in a wake_q- -adds work for migration/1 and adds migration/1 to its wake_q- -releases migration/0 and migration/1's locks, wakes migration/1, and enables preemption- -since migration/1 is requested to run, migration/1 begins to run and waits on migration/0, but migration/0 will never be able to run, since the thread that can wake it is affine to CPU1- Disable preemption in cpu_stop_queue_work() before queueing works for stopper threads, and queueing the stopper thread in the wake queue, to ensure that the operation of queueing the works and waking the stopper threads is atomic. Fixes: 0b26351b910f ("stop_machine, sched: Fix migrate_swap() vs. active_balance() deadlock") Signed-off-by: Prasad Sodagudi Signed-off-by: Isaac J. Manjarres Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: matt@codeblueprint.co.uk Cc: bigeasy@linutronix.de Cc: gregkh@linuxfoundation.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1533329766-4856-1-git-send-email-isaacm@codeaurora.org Signed-off-by: Greg Kroah-Hartman Co-Developed-by: Isaac J. Manjarres --- kernel/stop_machine.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 34b6652e8677..067cb83f37ea 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -81,6 +81,7 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) unsigned long flags; bool enabled; + preempt_disable(); raw_spin_lock_irqsave(&stopper->lock, flags); enabled = stopper->enabled; if (enabled) @@ -90,6 +91,7 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) raw_spin_unlock_irqrestore(&stopper->lock, flags); wake_up_q(&wakeq); + preempt_enable(); return enabled; } -- GitLab From 7773a6d94896d168f3430e541f31f85ce0e9a8f4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 1 Aug 2018 12:36:52 -0400 Subject: [PATCH 1138/1291] ext4: check for NUL characters in extended attribute's name commit 7d95178c77014dbd8dce36ee40bbbc5e6c121ff5 upstream. Extended attribute names are defined to be NUL-terminated, so the name must not contain a NUL character. This is important because there are places when remove extended attribute, the code uses strlen to determine the length of the entry. That should probably be fixed at some point, but code is currently really messy, so the simplest fix for now is to simply validate that the extended attributes are sane. https://bugzilla.kernel.org/show_bug.cgi?id=200401 Reported-by: Wen Xu Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c7c8c16ccd93..9bc50eef6127 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -189,6 +189,8 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end, struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); if ((void *)next >= end) return -EFSCORRUPTED; + if (strnlen(e->e_name, e->e_name_len) != e->e_name_len) + return -EFSCORRUPTED; e = next; } -- GitLab From 5043e05dd571ebdc22ff39724ff1da7de2945d49 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 29 Jul 2018 15:48:00 -0400 Subject: [PATCH 1139/1291] ext4: sysfs: print ext4_super_block fields as little-endian commit a4d2aadca184ece182418950d45ba4ffc7b652d2 upstream. While working on extended rand for last_error/first_error timestamps, I noticed that the endianess is wrong; we access the little-endian fields in struct ext4_super_block as native-endian when we print them. This adds a special case in ext4_attr_show() and ext4_attr_store() to byteswap the superblock fields if needed. In older kernels, this code was part of super.c, it got moved to sysfs.c in linux-4.4. Cc: stable@vger.kernel.org Fixes: 52c198c6820f ("ext4: add sysfs entry showing whether the fs contains errors") Reviewed-by: Andreas Dilger Signed-off-by: Arnd Bergmann Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/sysfs.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index e21afd52e7d7..bdfc2a2de8f2 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -278,8 +278,12 @@ static ssize_t ext4_attr_show(struct kobject *kobj, case attr_pointer_ui: if (!ptr) return 0; - return snprintf(buf, PAGE_SIZE, "%u\n", - *((unsigned int *) ptr)); + if (a->attr_ptr == ptr_ext4_super_block_offset) + return snprintf(buf, PAGE_SIZE, "%u\n", + le32_to_cpup(ptr)); + else + return snprintf(buf, PAGE_SIZE, "%u\n", + *((unsigned int *) ptr)); case attr_pointer_atomic: if (!ptr) return 0; @@ -312,7 +316,10 @@ static ssize_t ext4_attr_store(struct kobject *kobj, ret = kstrtoul(skip_spaces(buf), 0, &t); if (ret) return ret; - *((unsigned int *) ptr) = t; + if (a->attr_ptr == ptr_ext4_super_block_offset) + *((__le32 *) ptr) = cpu_to_le32(t); + else + *((unsigned int *) ptr) = t; return len; case attr_inode_readahead: return inode_readahead_blks_store(a, sbi, buf, len); -- GitLab From 509c0cdfb438e5cc887e75ba42477b499d177164 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sun, 29 Jul 2018 17:13:42 -0400 Subject: [PATCH 1140/1291] ext4: reset error code in ext4_find_entry in fallback commit f39b3f45dbcb0343822cce31ea7636ad66e60bc2 upstream. When ext4_find_entry() falls back to "searching the old fashioned way" due to a corrupt dx dir, it needs to reset the error code to NULL so that the nonstandard ERR_BAD_DX_DIR code isn't returned to userspace. https://bugzilla.kernel.org/show_bug.cgi?id=199947 Reported-by: Anatoly Trosinenko Reviewed-by: Andreas Dilger Signed-off-by: Eric Sandeen Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6747861f9b70..1db39e12e02b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1397,6 +1397,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, goto cleanup_and_exit; dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " "falling back\n")); + ret = NULL; } nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); if (!nblocks) { -- GitLab From 0c9bed3698891c256b8a83d0c5001286bb87699b Mon Sep 17 00:00:00 2001 From: Michal Wnukowski Date: Wed, 15 Aug 2018 15:51:57 -0700 Subject: [PATCH 1141/1291] nvme-pci: add a memory barrier to nvme_dbbuf_update_and_check_event commit f1ed3df20d2d223e0852cc4ac1f19bba869a7e3c upstream. In many architectures loads may be reordered with older stores to different locations. In the nvme driver the following two operations could be reordered: - Write shadow doorbell (dbbuf_db) into memory. - Read EventIdx (dbbuf_ei) from memory. This can result in a potential race condition between driver and VM host processing requests (if given virtual NVMe controller has a support for shadow doorbell). If that occurs, then the NVMe controller may decide to wait for MMIO doorbell from guest operating system, and guest driver may decide not to issue MMIO doorbell on any of subsequent commands. This issue is purely timing-dependent one, so there is no easy way to reproduce it. Currently the easiest known approach is to run "Oracle IO Numbers" (orion) that is shipped with Oracle DB: orion -run advanced -num_large 0 -size_small 8 -type rand -simulate \ concat -write 40 -duration 120 -matrix row -testname nvme_test Where nvme_test is a .lun file that contains a list of NVMe block devices to run test against. Limiting number of vCPUs assigned to given VM instance seems to increase chances for this bug to occur. On test environment with VM that got 4 NVMe drives and 1 vCPU assigned the virtual NVMe controller hang could be observed within 10-20 minutes. That correspond to about 400-500k IO operations processed (or about 100GB of IO read/writes). Orion tool was used as a validation and set to run in a loop for 36 hours (equivalent of pushing 550M IO operations). No issues were observed. That suggest that the patch fixes the issue. Fixes: f9f38e33389c ("nvme: improve performance for virtual NVMe devices") Signed-off-by: Michal Wnukowski Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg [hch: updated changelog and comment a bit] Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a67d03716510..afb99876fa9e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -306,6 +306,14 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, old_value = *dbbuf_db; *dbbuf_db = value; + /* + * Ensure that the doorbell is updated before reading the event + * index from memory. The controller needs to provide similar + * ordering to ensure the envent index is updated before reading + * the doorbell. + */ + mb(); + if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value)) return false; } -- GitLab From 7d91aa5717dbf852ef9d36a2c85f2900696fdd13 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 22 Jun 2018 10:59:17 +0200 Subject: [PATCH 1142/1291] platform/x86: ideapad-laptop: Apply no_hw_rfkill to Y20-15IKBM, too commit 58e73aa177850babb947555257fd4f79e5275cf1 upstream. The commit 5d9f40b56630 ("platform/x86: ideapad-laptop: Add Y520-15IKBN to no_hw_rfkill") added the entry for Y20-15IKBN, and it turned out that another variant, Y20-15IKBM, also requires the no_hw_rfkill. Trim the last letter from the string so that it matches to both Y20-15IKBN and Y20-15IKBM models. Bugzilla: https://bugzilla.opensuse.org/show_bug.cgi?id=1098626 Cc: Signed-off-by: Takashi Iwai Signed-off-by: Darren Hart (VMware) Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/ideapad-laptop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index fe98d4ac0df3..e1e7e587b45b 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1097,10 +1097,10 @@ static const struct dmi_system_id no_hw_rfkill_list[] = { }, }, { - .ident = "Lenovo Legion Y520-15IKBN", + .ident = "Lenovo Legion Y520-15IKB", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Y520-15IKBN"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Y520-15IKB"), }, }, { -- GitLab From 3e0994616d4ab790efd574c7f969cdc27491da88 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 23 Aug 2018 18:47:08 +1000 Subject: [PATCH 1143/1291] mm: move tlb_table_flush to tlb_flush_mmu_free commit db7ddef301128dad394f1c0f77027f86ee9a4edb upstream. There is no need to call this from tlb_flush_mmu_tlbonly, it logically belongs with tlb_flush_mmu_free. This makes future fixes simpler. [ This was originally done to allow code consolidation for the mmu_notifier fix, but it also ends up helping simplify the HAVE_RCU_TABLE_INVALIDATE fix. - Linus ] Signed-off-by: Nicholas Piggin Acked-by: Will Deacon Cc: Peter Zijlstra Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 72e1dfa84819..d3528c202679 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -246,9 +246,6 @@ static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) tlb_flush(tlb); mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end); -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - tlb_table_flush(tlb); -#endif __tlb_reset_range(tlb); } @@ -256,6 +253,9 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb) { struct mmu_gather_batch *batch; +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + tlb_table_flush(tlb); +#endif for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { free_pages_and_swap_cache(batch->pages, batch->nr); batch->nr = 0; -- GitLab From e9afa7c1ef17708e9dd6714c151aab81be4a5f68 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Aug 2018 17:30:15 +0200 Subject: [PATCH 1144/1291] mm/tlb, x86/mm: Support invalidating TLB caches for RCU_TABLE_FREE commit d86564a2f085b79ec046a5cba90188e612352806 upstream. Jann reported that x86 was missing required TLB invalidates when he hit the !*batch slow path in tlb_remove_table(). This is indeed the case; RCU_TABLE_FREE does not provide TLB (cache) invalidates, the PowerPC-hash where this code originated and the Sparc-hash where this was subsequently used did not need that. ARM which later used this put an explicit TLB invalidate in their __p*_free_tlb() functions, and PowerPC-radix followed that example. But when we hooked up x86 we failed to consider this. Fix this by (optionally) hooking tlb_remove_table() into the TLB invalidate code. NOTE: s390 was also needing something like this and might now be able to use the generic code again. [ Modified to be on top of Nick's cleanups, which simplified this patch now that tlb_flush_mmu_tlbonly() really only flushes the TLB - Linus ] Fixes: 9e52fc2b50de ("x86/mm: Enable RCU based page table freeing (CONFIG_HAVE_RCU_TABLE_FREE=y)") Reported-by: Jann Horn Signed-off-by: Peter Zijlstra (Intel) Acked-by: Rik van Riel Cc: Nicholas Piggin Cc: David Miller Cc: Will Deacon Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/Kconfig | 3 +++ arch/x86/Kconfig | 1 + mm/memory.c | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 4e01862f58e4..40dc31fea90c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -336,6 +336,9 @@ config HAVE_ARCH_JUMP_LABEL config HAVE_RCU_TABLE_FREE bool +config HAVE_RCU_TABLE_INVALIDATE + bool + config ARCH_HAVE_NMI_SAFE_CMPXCHG bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1c63a4b5320d..2af0af33362a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -170,6 +170,7 @@ config X86 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_RCU_TABLE_FREE + select HAVE_RCU_TABLE_INVALIDATE if HAVE_RCU_TABLE_FREE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION select HAVE_STACK_VALIDATION if X86_64 diff --git a/mm/memory.c b/mm/memory.c index d3528c202679..c9657f013a4d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -331,6 +331,21 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ * See the comment near struct mmu_table_batch. */ +/* + * If we want tlb_remove_table() to imply TLB invalidates. + */ +static inline void tlb_table_invalidate(struct mmu_gather *tlb) +{ +#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE + /* + * Invalidate page-table caches used by hardware walkers. Then we still + * need to RCU-sched wait while freeing the pages because software + * walkers can still be in-flight. + */ + tlb_flush_mmu_tlbonly(tlb); +#endif +} + static void tlb_remove_table_smp_sync(void *arg) { /* Simply deliver the interrupt */ @@ -367,6 +382,7 @@ void tlb_table_flush(struct mmu_gather *tlb) struct mmu_table_batch **batch = &tlb->batch; if (*batch) { + tlb_table_invalidate(tlb); call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); *batch = NULL; } @@ -388,11 +404,13 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) if (*batch == NULL) { *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); if (*batch == NULL) { + tlb_table_invalidate(tlb); tlb_remove_table_one(table); return; } (*batch)->nr = 0; } + (*batch)->tables[(*batch)->nr++] = table; if ((*batch)->nr == MAX_TABLE_BATCH) tlb_table_flush(tlb); -- GitLab From 7418d70862178363f4ad39ee645e3530c8ff9a8b Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 20 Aug 2018 11:58:35 +0200 Subject: [PATCH 1145/1291] x86/speculation/l1tf: Fix overflow in l1tf_pfn_limit() on 32bit commit 9df9516940a61d29aedf4d91b483ca6597e7d480 upstream. On 32bit PAE kernels on 64bit hardware with enough physical bits, l1tf_pfn_limit() will overflow unsigned long. This in turn affects max_swapfile_size() and can lead to swapon returning -EINVAL. This has been observed in a 32bit guest with 42 bits physical address size, where max_swapfile_size() overflows exactly to 1 << 32, thus zero, and produces the following warning to dmesg: [ 6.396845] Truncating oversized swap area, only using 0k out of 2047996k Fix this by using unsigned long long instead. Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf") Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2") Reported-by: Dominique Leuenberger Reported-by: Adrian Schroeter Signed-off-by: Vlastimil Babka Signed-off-by: Thomas Gleixner Acked-by: Andi Kleen Acked-by: Michal Hocko Cc: "H . Peter Anvin" Cc: Linus Torvalds Cc: Dave Hansen Cc: Michal Hocko Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180820095835.5298-1-vbabka@suse.cz Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/processor.h | 4 ++-- arch/x86/mm/init.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0e856c0628b3..7ed8b756a140 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -180,9 +180,9 @@ extern const struct seq_operations cpuinfo_op; extern void cpu_detect(struct cpuinfo_x86 *c); -static inline unsigned long l1tf_pfn_limit(void) +static inline unsigned long long l1tf_pfn_limit(void) { - return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1; + return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1; } extern void early_cpu_init(void); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 37f60dfd7e4e..48be2cfedf76 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -892,7 +892,7 @@ unsigned long max_swapfile_size(void) if (boot_cpu_has_bug(X86_BUG_L1TF)) { /* Limit the swap file size to MAX_PA/2 for L1TF workaround */ - unsigned long l1tf_limit = l1tf_pfn_limit() + 1; + unsigned long long l1tf_limit = l1tf_pfn_limit() + 1; /* * We encode swap offsets also with 3 bits below those for pfn * which makes the usable limit higher. @@ -900,7 +900,7 @@ unsigned long max_swapfile_size(void) #if CONFIG_PGTABLE_LEVELS > 2 l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT; #endif - pages = min_t(unsigned long, l1tf_limit, pages); + pages = min_t(unsigned long long, l1tf_limit, pages); } return pages; } -- GitLab From 59463ec29cacd63844e27fbefd34847fbfead956 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 23 Aug 2018 15:44:18 +0200 Subject: [PATCH 1146/1291] x86/speculation/l1tf: Fix off-by-one error when warning that system has too much RAM commit b0a182f875689647b014bc01d36b340217792852 upstream. Two users have reported [1] that they have an "extremely unlikely" system with more than MAX_PA/2 memory and L1TF mitigation is not effective. In fact it's a CPU with 36bits phys limit (64GB) and 32GB memory, but due to holes in the e820 map, the main region is almost 500MB over the 32GB limit: [ 0.000000] BIOS-e820: [mem 0x0000000100000000-0x000000081effffff] usable Suggestions to use 'mem=32G' to enable the L1TF mitigation while losing the 500MB revealed, that there's an off-by-one error in the check in l1tf_select_mitigation(). l1tf_pfn_limit() returns the last usable pfn (inclusive) and the range check in the mitigation path does not take this into account. Instead of amending the range check, make l1tf_pfn_limit() return the first PFN which is over the limit which is less error prone. Adjust the other users accordingly. [1] https://bugzilla.suse.com/show_bug.cgi?id=1105536 Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf") Reported-by: George Anchev Reported-by: Christopher Snowhill Signed-off-by: Vlastimil Babka Signed-off-by: Thomas Gleixner Cc: "H . Peter Anvin" Cc: Linus Torvalds Cc: Andi Kleen Cc: Dave Hansen Cc: Michal Hocko Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180823134418.17008-1-vbabka@suse.cz Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/processor.h | 2 +- arch/x86/mm/init.c | 2 +- arch/x86/mm/mmap.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7ed8b756a140..b222a38a59ed 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -182,7 +182,7 @@ extern void cpu_detect(struct cpuinfo_x86 *c); static inline unsigned long long l1tf_pfn_limit(void) { - return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1; + return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT); } extern void early_cpu_init(void); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 48be2cfedf76..94b8d90830d1 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -892,7 +892,7 @@ unsigned long max_swapfile_size(void) if (boot_cpu_has_bug(X86_BUG_L1TF)) { /* Limit the swap file size to MAX_PA/2 for L1TF workaround */ - unsigned long long l1tf_limit = l1tf_pfn_limit() + 1; + unsigned long long l1tf_limit = l1tf_pfn_limit(); /* * We encode swap offsets also with 3 bits below those for pfn * which makes the usable limit higher. diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 5f4805d69aab..53f1c18b15bd 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -191,7 +191,7 @@ bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) /* If it's real memory always allow */ if (pfn_valid(pfn)) return true; - if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN)) + if (pfn >= l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN)) return false; return true; } -- GitLab From 310f2a6e3ad39a9349c82d575e92d906fbad37c1 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 23 Aug 2018 16:21:29 +0200 Subject: [PATCH 1147/1291] x86/speculation/l1tf: Suggest what to do on systems with too much RAM commit 6a012288d6906fee1dbc244050ade1dafe4a9c8d upstream. Two users have reported [1] that they have an "extremely unlikely" system with more than MAX_PA/2 memory and L1TF mitigation is not effective. Make the warning more helpful by suggesting the proper mem=X kernel boot parameter to make it effective and a link to the L1TF document to help decide if the mitigation is worth the unusable RAM. [1] https://bugzilla.suse.com/show_bug.cgi?id=1105536 Suggested-by: Michal Hocko Signed-off-by: Vlastimil Babka Acked-by: Michal Hocko Cc: "H . Peter Anvin" Cc: Linus Torvalds Cc: Andi Kleen Cc: Dave Hansen Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/966571f0-9d7f-43dc-92c6-a10eec7a1254@suse.cz Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d07addb99b71..1fb88ee56c8a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -686,6 +686,10 @@ static void __init l1tf_select_mitigation(void) half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) { pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); + pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n", + half_pa); + pr_info("However, doing so will make a part of your RAM unusable.\n"); + pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n"); return; } -- GitLab From 4587db4c2a95db842d05313d37a332ce210f5bb7 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 16 Aug 2018 12:41:15 -0700 Subject: [PATCH 1148/1291] x86/vdso: Fix vDSO build if a retpoline is emitted commit 2e549b2ee0e358bc758480e716b881f9cabedb6a upstream. Currently, if the vDSO ends up containing an indirect branch or call, GCC will emit the "external thunk" style of retpoline, and it will fail to link. Fix it by building the vDSO with inline retpoline thunks. I haven't seen any reports of this triggering on an unpatched kernel. Fixes: commit 76b043848fd2 ("x86/retpoline: Add initial retpoline support") Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Acked-by: Matt Rickard Cc: Borislav Petkov Cc: Jason Vas Dias Cc: David Woodhouse Cc: Peter Zijlstra Cc: Andi Kleen Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/c76538cd3afbe19c6246c2d1715bc6a60bd63985.1534448381.git.luto@kernel.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 4 ++++ arch/x86/entry/vdso/Makefile | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 4dad2d1c24ba..93faeeece331 100644 --- a/Makefile +++ b/Makefile @@ -490,9 +490,13 @@ KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) endif RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register +RETPOLINE_VDSO_CFLAGS_GCC := -mindirect-branch=thunk-inline -mindirect-branch-register RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk +RETPOLINE_VDSO_CFLAGS_CLANG := -mretpoline RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG))) +RETPOLINE_VDSO_CFLAGS := $(call cc-option,$(RETPOLINE_VDSO_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_VDSO_CFLAGS_CLANG))) export RETPOLINE_CFLAGS +export RETPOLINE_VDSO_CFLAGS ifeq ($(config-targets),1) # =========================================================================== diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index c366c0adeb40..b545bf9d2328 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -74,9 +74,9 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ -fno-omit-frame-pointer -foptimize-sibling-calls \ - -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO + -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS) -$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. @@ -147,11 +147,13 @@ KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) KBUILD_CFLAGS_32 += -fno-omit-frame-pointer KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) $(obj)/vdso32.so.dbg: FORCE \ -- GitLab From cedb8037f0697a8c1b206ca7b02150182adb049b Mon Sep 17 00:00:00 2001 From: Rian Hunter Date: Sun, 19 Aug 2018 16:08:53 -0700 Subject: [PATCH 1149/1291] x86/process: Re-export start_thread() commit dc76803e57cc86589c4efcb5362918f9b0c0436f upstream. The consolidation of the start_thread() functions removed the export unintentionally. This breaks binfmt handlers built as a module. Add it back. Fixes: e634d8fc792c ("x86-64: merge the standard and compat start_thread() functions") Signed-off-by: Rian Hunter Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Vitaly Kuznetsov Cc: Joerg Roedel Cc: Dmitry Safonov Cc: Josh Poimboeuf Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180819230854.7275-1-rian@alum.mit.edu Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index fa093b77689f..cbeecfcc66d6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -370,6 +370,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) start_thread_common(regs, new_ip, new_sp, __USER_CS, __USER_DS, 0); } +EXPORT_SYMBOL_GPL(start_thread); #ifdef CONFIG_COMPAT void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp) -- GitLab From 933e1ab12051d00b6502732438c796a95f7db0d3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 12 Aug 2018 20:41:45 +0200 Subject: [PATCH 1150/1291] KVM: x86: SVM: Call x86_spec_ctrl_set_guest/host() with interrupts disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 024d83cadc6b2af027e473720f3c3da97496c318 upstream. Mikhail reported the following lockdep splat: WARNING: possible irq lock inversion dependency detected CPU 0/KVM/10284 just changed the state of lock: 000000000d538a88 (&st->lock){+...}, at: speculative_store_bypass_update+0x10b/0x170 but this lock was taken by another, HARDIRQ-safe lock in the past: (&(&sighand->siglock)->rlock){-.-.} and interrupts could create inverse lock ordering between them. Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&st->lock); local_irq_disable(); lock(&(&sighand->siglock)->rlock); lock(&st->lock); lock(&(&sighand->siglock)->rlock); *** DEADLOCK *** The code path which connects those locks is: speculative_store_bypass_update() ssb_prctl_set() do_seccomp() do_syscall_64() In svm_vcpu_run() speculative_store_bypass_update() is called with interupts enabled via x86_virt_spec_ctrl_set_guest/host(). This is actually a false positive, because GIF=0 so interrupts are disabled even if IF=1; however, we can easily move the invocations of x86_virt_spec_ctrl_set_guest/host() into the interrupt disabled region to cure it, and it's a good idea to keep the GIF=0/IF=1 area as small and self-contained as possible. Fixes: 1f50ddb4f418 ("x86/speculation: Handle HT correctly on AMD") Reported-by: Mikhail Gavrilov Signed-off-by: Thomas Gleixner Tested-by: Mikhail Gavrilov Cc: Joerg Roedel Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Matthew Wilcox Cc: Borislav Petkov Cc: Konrad Rzeszutek Wilk Cc: Tom Lendacky Cc: kvm@vger.kernel.org Cc: x86@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 282bbcbf3b6a..f6bebcec60b4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5067,8 +5067,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) clgi(); - local_irq_enable(); - /* * If this vCPU has touched SPEC_CTRL, restore the guest's value if * it's non-zero. Since vmentry is serialising on affected CPUs, there @@ -5077,6 +5075,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) */ x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); + local_irq_enable(); + asm volatile ( "push %%" _ASM_BP "; \n\t" "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" @@ -5199,12 +5199,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); - reload_tss(vcpu); local_irq_disable(); + x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); + vcpu->arch.cr2 = svm->vmcb->save.cr2; vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; -- GitLab From c49505f6efb191287c3b3d8fa6e8eb43ec4b20df Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Aug 2018 12:32:19 -0500 Subject: [PATCH 1151/1291] x86/kvm/vmx: Remove duplicate l1d flush definitions commit 94d7a86c21a3d6046bf4616272313cb7d525075a upstream. These are already defined higher up in the file. Fixes: 7db92e165ac8 ("x86/kvm: Move l1tf setup function") Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Cc: Paolo Bonzini Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/d7ca03ae210d07173452aeed85ffe344301219a5.1534253536.git.jpoimboe@redhat.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 31bb200eb00c..8958b35f6008 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9170,9 +9170,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) * information but as all relevant affected CPUs have 32KiB L1D cache size * there is no point in doing so. */ -#define L1D_CACHE_ORDER 4 -static void *vmx_l1d_flush_pages; - static void vmx_l1d_flush(struct kvm_vcpu *vcpu) { int size = PAGE_SIZE << L1D_CACHE_ORDER; -- GitLab From eaebcf902ae0260b9f9b5bacdc4530c164ca114f Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 17 Jul 2018 19:00:33 +0300 Subject: [PATCH 1152/1291] fuse: Don't access pipe->buffers without pipe_lock() commit a2477b0e67c52f4364a47c3ad70902bc2a61bd4c upstream. fuse_dev_splice_write() reads pipe->buffers to determine the size of 'bufs' array before taking the pipe_lock(). This is not safe as another thread might change the 'pipe->buffers' between the allocation and taking the pipe_lock(). So we end up with too small 'bufs' array. Move the bufs allocations inside pipe_lock()/pipe_unlock() to fix this. Fixes: dd3bb14f44a6 ("fuse: support splice() writing to fuse device") Signed-off-by: Andrey Ryabinin Cc: # v2.6.35 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 261fd13a75c6..f3f6cd3c2fb5 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1941,11 +1941,14 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, if (!fud) return -EPERM; + pipe_lock(pipe); + bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); - if (!bufs) + if (!bufs) { + pipe_unlock(pipe); return -ENOMEM; + } - pipe_lock(pipe); nbuf = 0; rem = 0; for (idx = 0; idx < pipe->nrbufs && rem < len; idx++) -- GitLab From 7d392674443cb561b2b9161d47198d3428b49d7c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 26 Jul 2018 16:13:11 +0200 Subject: [PATCH 1153/1291] fuse: fix initial parallel dirops commit 63576c13bd17848376c8ba4a98f5d5151140c4ac upstream. If parallel dirops are enabled in FUSE_INIT reply, then first operation may leave fi->mutex held. Reported-by: syzbot Fixes: 5c672ab3f0ee ("fuse: serialize dirops by default") Cc: # v4.7 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 10 ++++++---- fs/fuse/fuse_i.h | 4 ++-- fs/fuse/inode.c | 14 ++++++++++---- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 7a980b4462d9..29868c35c19a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -355,11 +355,12 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, struct inode *inode; struct dentry *newent; bool outarg_valid = true; + bool locked; - fuse_lock_inode(dir); + locked = fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, &outarg, &inode); - fuse_unlock_inode(dir); + fuse_unlock_inode(dir, locked); if (err == -ENOENT) { outarg_valid = false; err = 0; @@ -1332,6 +1333,7 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx) struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; u64 attr_version = 0; + bool locked; if (is_bad_inode(inode)) return -EIO; @@ -1359,9 +1361,9 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx) fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, FUSE_READDIR); } - fuse_lock_inode(inode); + locked = fuse_lock_inode(inode); fuse_request_send(fc, req); - fuse_unlock_inode(inode); + fuse_unlock_inode(inode, locked); nbytes = req->out.args[0].size; err = req->out.h.error; fuse_put_request(fc, req); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d5773ca67ad2..d71ef3b8354d 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -964,8 +964,8 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, void fuse_set_initialized(struct fuse_conn *fc); -void fuse_unlock_inode(struct inode *inode); -void fuse_lock_inode(struct inode *inode); +void fuse_unlock_inode(struct inode *inode, bool locked); +bool fuse_lock_inode(struct inode *inode); int fuse_setxattr(struct inode *inode, const char *name, const void *value, size_t size, int flags); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index a13ecefa9cd1..25fdf220d2e9 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -357,15 +357,21 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, return 0; } -void fuse_lock_inode(struct inode *inode) +bool fuse_lock_inode(struct inode *inode) { - if (!get_fuse_conn(inode)->parallel_dirops) + bool locked = false; + + if (!get_fuse_conn(inode)->parallel_dirops) { mutex_lock(&get_fuse_inode(inode)->mutex); + locked = true; + } + + return locked; } -void fuse_unlock_inode(struct inode *inode) +void fuse_unlock_inode(struct inode *inode, bool locked) { - if (!get_fuse_conn(inode)->parallel_dirops) + if (locked) mutex_unlock(&get_fuse_inode(inode)->mutex); } -- GitLab From cfb6eca6e4bbacf92630d551a108645163a8f80d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 26 Jul 2018 16:13:11 +0200 Subject: [PATCH 1154/1291] fuse: fix double request_end() commit 87114373ea507895a62afb10d2910bd9adac35a8 upstream. Refcounting of request is broken when fuse_abort_conn() is called and request is on the fpq->io list: - ref is taken too late - then it is not dropped Fixes: 0d8e84b0432b ("fuse: simplify request abort") Cc: # v4.2 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f3f6cd3c2fb5..3a402a96706b 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -364,7 +364,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) struct fuse_iqueue *fiq = &fc->iq; if (test_and_set_bit(FR_FINISHED, &req->flags)) - return; + goto out_put_req; spin_lock(&fiq->waitq.lock); list_del_init(&req->intr_entry); @@ -393,6 +393,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) wake_up(&req->waitq); if (req->end) req->end(fc, req); +out_put_req: fuse_put_request(fc, req); } @@ -2103,6 +2104,7 @@ void fuse_abort_conn(struct fuse_conn *fc) set_bit(FR_ABORTED, &req->flags); if (!test_bit(FR_LOCKED, &req->flags)) { set_bit(FR_PRIVATE, &req->flags); + __fuse_get_request(req); list_move(&req->list, &to_end1); } spin_unlock(&req->waitq.lock); @@ -2129,7 +2131,6 @@ void fuse_abort_conn(struct fuse_conn *fc) while (!list_empty(&to_end1)) { req = list_first_entry(&to_end1, struct fuse_req, list); - __fuse_get_request(req); list_del_init(&req->list); request_end(fc, req); } -- GitLab From fc17d7519e8e692ad270a7b73cdd6e6740c54493 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 26 Jul 2018 16:13:11 +0200 Subject: [PATCH 1155/1291] fuse: fix unlocked access to processing queue commit 45ff350bbd9d0f0977ff270a0d427c71520c0c37 upstream. fuse_dev_release() assumes that it's the only one referencing the fpq->processing list, but that's not true, since fuse_abort_conn() can be doing the same without any serialization between the two. Fixes: c3696046beb3 ("fuse: separate pqueue for clones") Cc: # v4.2 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 3a402a96706b..482355e8b313 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2148,9 +2148,15 @@ int fuse_dev_release(struct inode *inode, struct file *file) if (fud) { struct fuse_conn *fc = fud->fc; struct fuse_pqueue *fpq = &fud->pq; + LIST_HEAD(to_end); + spin_lock(&fpq->lock); WARN_ON(!list_empty(&fpq->io)); - end_requests(fc, &fpq->processing); + list_splice_init(&fpq->processing, &to_end); + spin_unlock(&fpq->lock); + + end_requests(fc, &to_end); + /* Are we the last open device? */ if (atomic_dec_and_test(&fc->dev_count)) { WARN_ON(fc->iq.fasync != NULL); -- GitLab From 9732069238128212a2c2b9b449a88482e627157b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 26 Jul 2018 16:13:11 +0200 Subject: [PATCH 1156/1291] fuse: umount should wait for all requests commit b8f95e5d13f5f0191dcb4b9113113d241636e7cb upstream. fuse_abort_conn() does not guarantee that all async requests have actually finished aborting (i.e. their ->end() function is called). This could actually result in still used inodes after umount. Add a helper to wait until all requests are fully done. This is done by looking at the "num_waiting" counter. When this counter drops to zero, we can be sure that no more requests are outstanding. Fixes: 0d8e84b0432b ("fuse: simplify request abort") Cc: # v4.2 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 23 +++++++++++++++++++---- fs/fuse/fuse_i.h | 1 + fs/fuse/inode.c | 2 ++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 482355e8b313..ee8105af4001 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -131,6 +131,16 @@ static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background) return !fc->initialized || (for_background && fc->blocked); } +static void fuse_drop_waiting(struct fuse_conn *fc) +{ + if (fc->connected) { + atomic_dec(&fc->num_waiting); + } else if (atomic_dec_and_test(&fc->num_waiting)) { + /* wake up aborters */ + wake_up_all(&fc->blocked_waitq); + } +} + static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, bool for_background) { @@ -171,7 +181,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, return req; out: - atomic_dec(&fc->num_waiting); + fuse_drop_waiting(fc); return ERR_PTR(err); } @@ -278,7 +288,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) if (test_bit(FR_WAITING, &req->flags)) { __clear_bit(FR_WAITING, &req->flags); - atomic_dec(&fc->num_waiting); + fuse_drop_waiting(fc); } if (req->stolen_file) @@ -364,7 +374,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) struct fuse_iqueue *fiq = &fc->iq; if (test_and_set_bit(FR_FINISHED, &req->flags)) - goto out_put_req; + goto put_request; spin_lock(&fiq->waitq.lock); list_del_init(&req->intr_entry); @@ -393,7 +403,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) wake_up(&req->waitq); if (req->end) req->end(fc, req); -out_put_req: +put_request: fuse_put_request(fc, req); } @@ -2141,6 +2151,11 @@ void fuse_abort_conn(struct fuse_conn *fc) } EXPORT_SYMBOL_GPL(fuse_abort_conn); +void fuse_wait_aborted(struct fuse_conn *fc) +{ + wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0); +} + int fuse_dev_release(struct inode *inode, struct file *file) { struct fuse_dev *fud = fuse_get_dev(file); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d71ef3b8354d..e105640153ce 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -852,6 +852,7 @@ void fuse_request_send_background_locked(struct fuse_conn *fc, /* Abort all requests */ void fuse_abort_conn(struct fuse_conn *fc); +void fuse_wait_aborted(struct fuse_conn *fc); /** * Invalidate inode attributes diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 25fdf220d2e9..b6c0298a4f62 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -400,6 +400,8 @@ static void fuse_put_super(struct super_block *sb) fuse_send_destroy(fc); fuse_abort_conn(fc); + fuse_wait_aborted(fc); + mutex_lock(&fuse_mutex); list_del(&fc->entry); fuse_ctl_remove_conn(fc); -- GitLab From ff4a71855d0a4538f705894679ae0754374549e1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 26 Jul 2018 16:13:11 +0200 Subject: [PATCH 1157/1291] fuse: Fix oops at process_init_reply() commit e8f3bd773d22f488724dffb886a1618da85c2966 upstream. syzbot is hitting NULL pointer dereference at process_init_reply(). This is because deactivate_locked_super() is called before response for initial request is processed. Fix this by aborting and waiting for all requests (including FUSE_INIT) before resetting fc->sb. Original patch by Tetsuo Handa . Reported-by: syzbot Fixes: e27c9d3877a0 ("fuse: fuse: add time_gran to INIT_OUT") Cc: # v3.19 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/inode.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index b6c0298a4f62..ffb61787d77a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -397,11 +397,6 @@ static void fuse_put_super(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); - fuse_send_destroy(fc); - - fuse_abort_conn(fc); - fuse_wait_aborted(fc); - mutex_lock(&fuse_mutex); list_del(&fc->entry); fuse_ctl_remove_conn(fc); @@ -1198,16 +1193,25 @@ static struct dentry *fuse_mount(struct file_system_type *fs_type, return mount_nodev(fs_type, flags, raw_data, fuse_fill_super); } -static void fuse_kill_sb_anon(struct super_block *sb) +static void fuse_sb_destroy(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); if (fc) { + fuse_send_destroy(fc); + + fuse_abort_conn(fc); + fuse_wait_aborted(fc); + down_write(&fc->killsb); fc->sb = NULL; up_write(&fc->killsb); } +} +static void fuse_kill_sb_anon(struct super_block *sb) +{ + fuse_sb_destroy(sb); kill_anon_super(sb); } @@ -1230,14 +1234,7 @@ static struct dentry *fuse_mount_blk(struct file_system_type *fs_type, static void fuse_kill_sb_blk(struct super_block *sb) { - struct fuse_conn *fc = get_fuse_conn_super(sb); - - if (fc) { - down_write(&fc->killsb); - fc->sb = NULL; - up_write(&fc->killsb); - } - + fuse_sb_destroy(sb); kill_block_super(sb); } -- GitLab From e8a3f3a03655a3b4fb2c40972034a40c77b2842e Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 19 Jul 2018 15:49:39 +0300 Subject: [PATCH 1158/1291] fuse: Add missed unlock_page() to fuse_readpages_fill() commit 109728ccc5933151c68d1106e4065478a487a323 upstream. The above error path returns with page unlocked, so this place seems also to behave the same. Fixes: f8dbdf81821b ("fuse: rework fuse_readpages()") Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index cb7dff5c45d7..fb4738ef162f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -866,6 +866,7 @@ static int fuse_readpages_fill(void *_data, struct page *page) } if (WARN_ON(req->num_pages >= req->max_pages)) { + unlock_page(page); fuse_put_request(fc, req); return -EIO; } -- GitLab From 29e641a3693a14bfcf144dc245ded989e00025e6 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 3 Jun 2018 16:40:55 +0200 Subject: [PATCH 1159/1291] udl-kms: change down_interruptible to down commit 8456b99c16d193c4c3b7df305cf431e027f0189c upstream. If we leave urbs around, it causes not only leak, but also memory corruption. This patch fixes the function udl_free_urb_list, so that it always waits for all urbs that are in progress. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/udl/udl_main.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index 0328b2c7b210..2a1760841530 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -169,18 +169,13 @@ static void udl_free_urb_list(struct drm_device *dev) struct list_head *node; struct urb_node *unode; struct urb *urb; - int ret; unsigned long flags; DRM_DEBUG("Waiting for completes and freeing all render urbs\n"); /* keep waiting and freeing, until we've got 'em all */ while (count--) { - - /* Getting interrupted means a leak, but ok at shutdown*/ - ret = down_interruptible(&udl->urbs.limit_sem); - if (ret) - break; + down(&udl->urbs.limit_sem); spin_lock_irqsave(&udl->urbs.lock, flags); -- GitLab From 86c18c5a4bc5dbd86ddd74da2735188c4b09692a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 3 Jun 2018 16:40:56 +0200 Subject: [PATCH 1160/1291] udl-kms: handle allocation failure commit 542bb9788a1f485eb1a2229178f665d8ea166156 upstream. Allocations larger than PAGE_ALLOC_COSTLY_ORDER are unreliable and they may fail anytime. This patch fixes the udl kms driver so that when a large alloactions fails, it tries to do multiple smaller allocations. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/udl/udl_main.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index 2a1760841530..f8ea3c99b523 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -199,17 +199,22 @@ static void udl_free_urb_list(struct drm_device *dev) static int udl_alloc_urb_list(struct drm_device *dev, int count, size_t size) { struct udl_device *udl = dev->dev_private; - int i = 0; struct urb *urb; struct urb_node *unode; char *buf; + size_t wanted_size = count * size; spin_lock_init(&udl->urbs.lock); +retry: udl->urbs.size = size; INIT_LIST_HEAD(&udl->urbs.list); - while (i < count) { + sema_init(&udl->urbs.limit_sem, 0); + udl->urbs.count = 0; + udl->urbs.available = 0; + + while (udl->urbs.count * size < wanted_size) { unode = kzalloc(sizeof(struct urb_node), GFP_KERNEL); if (!unode) break; @@ -225,11 +230,16 @@ static int udl_alloc_urb_list(struct drm_device *dev, int count, size_t size) } unode->urb = urb; - buf = usb_alloc_coherent(udl->udev, MAX_TRANSFER, GFP_KERNEL, + buf = usb_alloc_coherent(udl->udev, size, GFP_KERNEL, &urb->transfer_dma); if (!buf) { kfree(unode); usb_free_urb(urb); + if (size > PAGE_SIZE) { + size /= 2; + udl_free_urb_list(dev); + goto retry; + } break; } @@ -240,16 +250,14 @@ static int udl_alloc_urb_list(struct drm_device *dev, int count, size_t size) list_add_tail(&unode->entry, &udl->urbs.list); - i++; + up(&udl->urbs.limit_sem); + udl->urbs.count++; + udl->urbs.available++; } - sema_init(&udl->urbs.limit_sem, i); - udl->urbs.count = i; - udl->urbs.available = i; - - DRM_DEBUG("allocated %d %d byte urbs\n", i, (int) size); + DRM_DEBUG("allocated %d %d byte urbs\n", udl->urbs.count, (int) size); - return i; + return udl->urbs.count; } struct urb *udl_get_urb(struct drm_device *dev) -- GitLab From c0357c1895aedbcbbd9a2eefbff9504f904161f7 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 3 Jun 2018 16:40:57 +0200 Subject: [PATCH 1161/1291] udl-kms: fix crash due to uninitialized memory commit 09a00abe3a9941c2715ca83eb88172cd2f54d8fd upstream. We must use kzalloc when allocating the fb_deferred_io structure. Otherwise, the field first_io is undefined and it causes a crash. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/udl/udl_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index d5583190f3e4..c3f5867dac91 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -221,7 +221,7 @@ static int udl_fb_open(struct fb_info *info, int user) struct fb_deferred_io *fbdefio; - fbdefio = kmalloc(sizeof(struct fb_deferred_io), GFP_KERNEL); + fbdefio = kzalloc(sizeof(struct fb_deferred_io), GFP_KERNEL); if (fbdefio) { fbdefio->delay = DL_DEFIO_WRITE_DELAY; -- GitLab From f337a54878e2c0d662ee7bd04b6daabfdaad176f Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 3 Jun 2018 16:41:00 +0200 Subject: [PATCH 1162/1291] udl-kms: avoid division commit 91ba11fb7d7ca0a3bbe8a512e65e666e2ec1e889 upstream. Division is slow, so it shouldn't be done by the pixel generating code. The driver supports only 2 or 4 bytes per pixel, so we can replace division with a shift. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/udl/udl_drv.h | 2 +- drivers/gpu/drm/udl/udl_fb.c | 15 +++++++----- drivers/gpu/drm/udl/udl_transfer.c | 39 +++++++++++++++--------------- 3 files changed, 30 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h index 2a75ab80527a..2c149b841cf1 100644 --- a/drivers/gpu/drm/udl/udl_drv.h +++ b/drivers/gpu/drm/udl/udl_drv.h @@ -110,7 +110,7 @@ udl_fb_user_fb_create(struct drm_device *dev, struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd); -int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, +int udl_render_hline(struct drm_device *dev, int log_bpp, struct urb **urb_ptr, const char *front, char **urb_buf_ptr, u32 byte_offset, u32 device_byte_offset, u32 byte_width, int *ident_ptr, int *sent_ptr); diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index c3f5867dac91..8746eeeec44d 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -90,7 +90,10 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y, int bytes_identical = 0; struct urb *urb; int aligned_x; - int bpp = fb->base.format->cpp[0]; + int log_bpp; + + BUG_ON(!is_power_of_2(fb->base.format->cpp[0])); + log_bpp = __ffs(fb->base.format->cpp[0]); if (!fb->active_16) return 0; @@ -125,12 +128,12 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y, for (i = y; i < y + height ; i++) { const int line_offset = fb->base.pitches[0] * i; - const int byte_offset = line_offset + (x * bpp); - const int dev_byte_offset = (fb->base.width * bpp * i) + (x * bpp); - if (udl_render_hline(dev, bpp, &urb, + const int byte_offset = line_offset + (x << log_bpp); + const int dev_byte_offset = (fb->base.width * i + x) << log_bpp; + if (udl_render_hline(dev, log_bpp, &urb, (char *) fb->obj->vmapping, &cmd, byte_offset, dev_byte_offset, - width * bpp, + width << log_bpp, &bytes_identical, &bytes_sent)) goto error; } @@ -149,7 +152,7 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y, error: atomic_add(bytes_sent, &udl->bytes_sent); atomic_add(bytes_identical, &udl->bytes_identical); - atomic_add(width*height*bpp, &udl->bytes_rendered); + atomic_add((width * height) << log_bpp, &udl->bytes_rendered); end_cycles = get_cycles(); atomic_add(((unsigned int) ((end_cycles - start_cycles) >> 10)), /* Kcycles */ diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c index b992644c17e6..f3331d33547a 100644 --- a/drivers/gpu/drm/udl/udl_transfer.c +++ b/drivers/gpu/drm/udl/udl_transfer.c @@ -83,12 +83,12 @@ static inline u16 pixel32_to_be16(const uint32_t pixel) ((pixel >> 8) & 0xf800)); } -static inline u16 get_pixel_val16(const uint8_t *pixel, int bpp) +static inline u16 get_pixel_val16(const uint8_t *pixel, int log_bpp) { - u16 pixel_val16 = 0; - if (bpp == 2) + u16 pixel_val16; + if (log_bpp == 1) pixel_val16 = *(const uint16_t *)pixel; - else if (bpp == 4) + else pixel_val16 = pixel32_to_be16(*(const uint32_t *)pixel); return pixel_val16; } @@ -125,8 +125,9 @@ static void udl_compress_hline16( const u8 *const pixel_end, uint32_t *device_address_ptr, uint8_t **command_buffer_ptr, - const uint8_t *const cmd_buffer_end, int bpp) + const uint8_t *const cmd_buffer_end, int log_bpp) { + const int bpp = 1 << log_bpp; const u8 *pixel = *pixel_start_ptr; uint32_t dev_addr = *device_address_ptr; uint8_t *cmd = *command_buffer_ptr; @@ -153,12 +154,12 @@ static void udl_compress_hline16( raw_pixels_count_byte = cmd++; /* we'll know this later */ raw_pixel_start = pixel; - cmd_pixel_end = pixel + min3(MAX_CMD_PIXELS + 1UL, - (unsigned long)(pixel_end - pixel) / bpp, - (unsigned long)(cmd_buffer_end - 1 - cmd) / 2) * bpp; + cmd_pixel_end = pixel + (min3(MAX_CMD_PIXELS + 1UL, + (unsigned long)(pixel_end - pixel) >> log_bpp, + (unsigned long)(cmd_buffer_end - 1 - cmd) / 2) << log_bpp); prefetch_range((void *) pixel, cmd_pixel_end - pixel); - pixel_val16 = get_pixel_val16(pixel, bpp); + pixel_val16 = get_pixel_val16(pixel, log_bpp); while (pixel < cmd_pixel_end) { const u8 *const start = pixel; @@ -170,7 +171,7 @@ static void udl_compress_hline16( pixel += bpp; while (pixel < cmd_pixel_end) { - pixel_val16 = get_pixel_val16(pixel, bpp); + pixel_val16 = get_pixel_val16(pixel, log_bpp); if (pixel_val16 != repeating_pixel_val16) break; pixel += bpp; @@ -179,10 +180,10 @@ static void udl_compress_hline16( if (unlikely(pixel > start + bpp)) { /* go back and fill in raw pixel count */ *raw_pixels_count_byte = (((start - - raw_pixel_start) / bpp) + 1) & 0xFF; + raw_pixel_start) >> log_bpp) + 1) & 0xFF; /* immediately after raw data is repeat byte */ - *cmd++ = (((pixel - start) / bpp) - 1) & 0xFF; + *cmd++ = (((pixel - start) >> log_bpp) - 1) & 0xFF; /* Then start another raw pixel span */ raw_pixel_start = pixel; @@ -192,14 +193,14 @@ static void udl_compress_hline16( if (pixel > raw_pixel_start) { /* finalize last RAW span */ - *raw_pixels_count_byte = ((pixel-raw_pixel_start) / bpp) & 0xFF; + *raw_pixels_count_byte = ((pixel - raw_pixel_start) >> log_bpp) & 0xFF; } else { /* undo unused byte */ cmd--; } - *cmd_pixels_count_byte = ((pixel - cmd_pixel_start) / bpp) & 0xFF; - dev_addr += ((pixel - cmd_pixel_start) / bpp) * 2; + *cmd_pixels_count_byte = ((pixel - cmd_pixel_start) >> log_bpp) & 0xFF; + dev_addr += ((pixel - cmd_pixel_start) >> log_bpp) * 2; } if (cmd_buffer_end <= MIN_RLX_CMD_BYTES + cmd) { @@ -222,19 +223,19 @@ static void udl_compress_hline16( * (that we can only write to, slowly, and can never read), and (optionally) * our shadow copy that tracks what's been sent to that hardware buffer. */ -int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, +int udl_render_hline(struct drm_device *dev, int log_bpp, struct urb **urb_ptr, const char *front, char **urb_buf_ptr, u32 byte_offset, u32 device_byte_offset, u32 byte_width, int *ident_ptr, int *sent_ptr) { const u8 *line_start, *line_end, *next_pixel; - u32 base16 = 0 + (device_byte_offset / bpp) * 2; + u32 base16 = 0 + (device_byte_offset >> log_bpp) * 2; struct urb *urb = *urb_ptr; u8 *cmd = *urb_buf_ptr; u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length; - BUG_ON(!(bpp == 2 || bpp == 4)); + BUG_ON(!(log_bpp == 1 || log_bpp == 2)); line_start = (u8 *) (front + byte_offset); next_pixel = line_start; @@ -244,7 +245,7 @@ int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, udl_compress_hline16(&next_pixel, line_end, &base16, - (u8 **) &cmd, (u8 *) cmd_end, bpp); + (u8 **) &cmd, (u8 *) cmd_end, log_bpp); if (cmd >= cmd_end) { int len = cmd - (u8 *) urb->transfer_buffer; -- GitLab From f0f3784ee3e944bc0044da45ad1e56580fd137d9 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Tue, 31 Jul 2018 21:14:16 +0200 Subject: [PATCH 1163/1291] b43legacy/leds: Ensure NUL-termination of LED name string commit 4d77a89e3924b12f4a5628b21237e57ab4703866 upstream. strncpy might not NUL-terminate the string, if the name equals the buffer size. Use strlcpy instead. Signed-off-by: Michael Buesch Cc: stable@vger.kernel.org Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/b43legacy/leds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43legacy/leds.c b/drivers/net/wireless/broadcom/b43legacy/leds.c index fd4565389c77..bc922118b6ac 100644 --- a/drivers/net/wireless/broadcom/b43legacy/leds.c +++ b/drivers/net/wireless/broadcom/b43legacy/leds.c @@ -101,7 +101,7 @@ static int b43legacy_register_led(struct b43legacy_wldev *dev, led->dev = dev; led->index = led_index; led->activelow = activelow; - strncpy(led->name, name, sizeof(led->name)); + strlcpy(led->name, name, sizeof(led->name)); led->led_dev.name = led->name; led->led_dev.default_trigger = default_trigger; -- GitLab From e16bbdeccdd7fa073f35c98f2c1d69188984731e Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Tue, 31 Jul 2018 21:14:04 +0200 Subject: [PATCH 1164/1291] b43/leds: Ensure NUL-termination of LED name string commit 2aa650d1950fce94f696ebd7db30b8830c2c946f upstream. strncpy might not NUL-terminate the string, if the name equals the buffer size. Use strlcpy instead. Signed-off-by: Michael Buesch Cc: stable@vger.kernel.org Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/b43/leds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43/leds.c b/drivers/net/wireless/broadcom/b43/leds.c index cb987c2ecc6b..87131f663292 100644 --- a/drivers/net/wireless/broadcom/b43/leds.c +++ b/drivers/net/wireless/broadcom/b43/leds.c @@ -131,7 +131,7 @@ static int b43_register_led(struct b43_wldev *dev, struct b43_led *led, led->wl = dev->wl; led->index = led_index; led->activelow = activelow; - strncpy(led->name, name, sizeof(led->name)); + strlcpy(led->name, name, sizeof(led->name)); atomic_set(&led->state, 0); led->led_dev.name = led->name; -- GitLab From d1913b9e07e7044dc8d98a8998669cfa1b7461fe Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 27 Jun 2018 17:36:38 +0200 Subject: [PATCH 1165/1291] ASoC: dpcm: don't merge format from invalid codec dai commit 4febced15ac8ddb9cf3e603edb111842e4863d9a upstream. When merging codec formats, dpcm_runtime_base_format() should skip the codecs which are not supporting the current stream direction. At the moment, if a BE link has more than one codec, and only one of these codecs has no capture DAI, it becomes impossible to start a capture stream because the merged format would be 0. Skipping invalid codec DAI solves the problem. Fixes: b073ed4e2126 ("ASoC: soc-pcm: DPCM cares BE format") Signed-off-by: Jerome Brunet Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-pcm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 3d0dab8282ad..6fc85199ac73 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1607,6 +1607,14 @@ static u64 dpcm_runtime_base_format(struct snd_pcm_substream *substream) int i; for (i = 0; i < be->num_codecs; i++) { + /* + * Skip CODECs which don't support the current stream + * type. See soc_pcm_init_runtime_hw() for more details + */ + if (!snd_soc_dai_stream_valid(be->codec_dais[i], + stream)) + continue; + codec_dai_drv = be->codec_dais[i]->driver; if (stream == SNDRV_PCM_STREAM_PLAYBACK) codec_stream = &codec_dai_drv->playback; -- GitLab From 2ef691428ee3f8cec8e4374cf4cf4a6eae9c0bf0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 Jul 2018 22:40:49 +0200 Subject: [PATCH 1166/1291] ASoC: zte: Fix incorrect PCM format bit usages commit c889a45d229938a94b50aadb819def8bb11a6a54 upstream. zx-tdm driver sets the DAI driver definitions with the format bits wrongly set with SNDRV_PCM_FORMAT_*, instead of SNDRV_PCM_FMTBIT_*. This patch corrects the definitions. Spotted by a sparse warning: sound/soc/zte/zx-tdm.c:363:35: warning: restricted snd_pcm_format_t degrades to integer Fixes: 870e0ddc4345 ("ASoC: zx-tdm: add zte's tdm controller driver") Cc: Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/zte/zx-tdm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/zte/zx-tdm.c b/sound/soc/zte/zx-tdm.c index dc955272f58b..389272eeba9a 100644 --- a/sound/soc/zte/zx-tdm.c +++ b/sound/soc/zte/zx-tdm.c @@ -144,8 +144,8 @@ static void zx_tdm_rx_dma_en(struct zx_tdm_info *tdm, bool on) #define ZX_TDM_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000) #define ZX_TDM_FMTBIT \ - (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FORMAT_MU_LAW | \ - SNDRV_PCM_FORMAT_A_LAW) + (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_MU_LAW | \ + SNDRV_PCM_FMTBIT_A_LAW) static int zx_tdm_dai_probe(struct snd_soc_dai *dai) { -- GitLab From 7806d2ef49042e5941869363042089768552211f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 26 Jul 2018 15:49:10 -0500 Subject: [PATCH 1167/1291] ASoC: sirf: Fix potential NULL pointer dereference commit ae1c696a480c67c45fb23b35162183f72c6be0e1 upstream. There is a potential execution path in which function platform_get_resource() returns NULL. If this happens, we will end up having a NULL pointer dereference. Fix this by replacing devm_ioremap with devm_ioremap_resource, which has the NULL check and the memory region request. This code was detected with the help of Coccinelle. Cc: stable@vger.kernel.org Fixes: 2bd8d1d5cf89 ("ASoC: sirf: Add audio usp interface driver") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/sirf/sirf-usp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sound/soc/sirf/sirf-usp.c b/sound/soc/sirf/sirf-usp.c index 77e7dcf969d0..d70fcd4a1adf 100644 --- a/sound/soc/sirf/sirf-usp.c +++ b/sound/soc/sirf/sirf-usp.c @@ -370,10 +370,9 @@ static int sirf_usp_pcm_probe(struct platform_device *pdev) platform_set_drvdata(pdev, usp); mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_ioremap(&pdev->dev, mem_res->start, - resource_size(mem_res)); - if (base == NULL) - return -ENOMEM; + base = devm_ioremap_resource(&pdev->dev, mem_res); + if (IS_ERR(base)) + return PTR_ERR(base); usp->regmap = devm_regmap_init_mmio(&pdev->dev, base, &sirf_usp_regmap_config); if (IS_ERR(usp->regmap)) -- GitLab From 42228037aa5fd789fbde006b1ff74c8ae5c4f93f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Jul 2018 17:55:15 +0300 Subject: [PATCH 1168/1291] pinctrl: freescale: off by one in imx1_pinconf_group_dbg_show() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 19da44cd33a3a6ff7c97fff0189999ff15b241e4 upstream. The info->groups[] array is allocated in imx1_pinctrl_parse_dt(). It has info->ngroups elements. Thus the > here should be >= to prevent reading one element beyond the end of the array. Cc: stable@vger.kernel.org Fixes: 30612cd90005 ("pinctrl: imx1 core driver") Signed-off-by: Dan Carpenter Reviewed-by: Uwe Kleine-König Acked-by: Dong Aisheng Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/freescale/pinctrl-imx1-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/freescale/pinctrl-imx1-core.c b/drivers/pinctrl/freescale/pinctrl-imx1-core.c index a4e9f430d452..e2cca91fd266 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx1-core.c +++ b/drivers/pinctrl/freescale/pinctrl-imx1-core.c @@ -433,7 +433,7 @@ static void imx1_pinconf_group_dbg_show(struct pinctrl_dev *pctldev, const char *name; int i, ret; - if (group > info->ngroups) + if (group >= info->ngroups) return; seq_puts(s, "\n"); -- GitLab From fbd5b82d30d2884344f324e3e5709e189fa819c4 Mon Sep 17 00:00:00 2001 From: Samuel Neves Date: Sat, 1 Sep 2018 21:14:52 +0100 Subject: [PATCH 1169/1291] x86/vdso: Fix lsl operand order commit e78e5a91456fcecaa2efbb3706572fe043766f4d upstream. In the __getcpu function, lsl is using the wrong target and destination registers. Luckily, the compiler tends to choose %eax for both variables, so it has been working so far. Fixes: a582c540ac1b ("x86/vdso: Use RDPID in preference to LSL when available") Signed-off-by: Samuel Neves Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180901201452.27828-1-sneves@dei.uc.pt Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/vgtod.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 52250681f68c..d92ccff4e615 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -93,7 +93,7 @@ static inline unsigned int __getcpu(void) * * If RDPID is available, use it. */ - alternative_io ("lsl %[p],%[seg]", + alternative_io ("lsl %[seg],%[p]", ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ X86_FEATURE_RDPID, [p] "=a" (p), [seg] "r" (__PER_CPU_SEG)); -- GitLab From 53f01e2004ae862f76240c7d92a59fa00a7bfa96 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 29 Aug 2018 08:47:18 -0700 Subject: [PATCH 1170/1291] x86/nmi: Fix NMI uaccess race against CR3 switching commit 4012e77a903d114f915fc607d6d2ed54a3d6c9b1 upstream. A NMI can hit in the middle of context switching or in the middle of switch_mm_irqs_off(). In either case, CR3 might not match current->mm, which could cause copy_from_user_nmi() and friends to read the wrong memory. Fix it by adding a new nmi_uaccess_okay() helper and checking it in copy_from_user_nmi() and in __copy_from_user_nmi()'s callers. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Rik van Riel Cc: Nadav Amit Cc: Borislav Petkov Cc: Jann Horn Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/dd956eba16646fd0b15c3c0741269dfd84452dac.1535557289.git.luto@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/core.c | 2 +- arch/x86/include/asm/tlbflush.h | 40 +++++++++++++++++++++++++++++++++ arch/x86/lib/usercopy.c | 5 +++++ arch/x86/mm/tlb.c | 7 ++++++ 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 717c9219d00e..e5097dc85a06 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2462,7 +2462,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs perf_callchain_store(entry, regs->ip); - if (!current->mm) + if (!nmi_uaccess_okay()) return; if (perf_callchain_user32(regs, entry)) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 875ca99b82ee..5f00ecb9d251 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -175,8 +175,16 @@ struct tlb_state { * are on. This means that it may not match current->active_mm, * which will contain the previous user mm when we're in lazy TLB * mode even if we've already switched back to swapper_pg_dir. + * + * During switch_mm_irqs_off(), loaded_mm will be set to + * LOADED_MM_SWITCHING during the brief interrupts-off window + * when CR3 and loaded_mm would otherwise be inconsistent. This + * is for nmi_uaccess_okay()'s benefit. */ struct mm_struct *loaded_mm; + +#define LOADED_MM_SWITCHING ((struct mm_struct *)1) + u16 loaded_mm_asid; u16 next_asid; /* last user mm's ctx id */ @@ -246,6 +254,38 @@ struct tlb_state { }; DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); +/* + * Blindly accessing user memory from NMI context can be dangerous + * if we're in the middle of switching the current user task or + * switching the loaded mm. It can also be dangerous if we + * interrupted some kernel code that was temporarily using a + * different mm. + */ +static inline bool nmi_uaccess_okay(void) +{ + struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); + struct mm_struct *current_mm = current->mm; + + VM_WARN_ON_ONCE(!loaded_mm); + + /* + * The condition we want to check is + * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though, + * if we're running in a VM with shadow paging, and nmi_uaccess_okay() + * is supposed to be reasonably fast. + * + * Instead, we check the almost equivalent but somewhat conservative + * condition below, and we rely on the fact that switch_mm_irqs_off() + * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3. + */ + if (loaded_mm != current_mm) + return false; + + VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa())); + + return true; +} + /* Initialize cr4 shadow for this CPU. */ static inline void cr4_init_shadow(void) { diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index c8c6ad0d58b8..3f435d7fca5e 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -7,6 +7,8 @@ #include #include +#include + /* * We rely on the nested NMI work to allow atomic faults from the NMI path; the * nested NMI paths are careful to preserve CR2. @@ -19,6 +21,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) if (__range_not_ok(from, n, TASK_SIZE)) return n; + if (!nmi_uaccess_okay()) + return n; + /* * Even though this function is typically called from NMI/IRQ context * disable pagefaults so that its behaviour is consistent even when diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 0c936435ea93..83a3f4c935fc 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -292,6 +292,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); + /* Let nmi_uaccess_okay() know that we're changing CR3. */ + this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); + barrier(); + if (need_flush) { this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); @@ -322,6 +326,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (next != &init_mm) this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); + /* Make sure we write CR3 before loaded_mm. */ + barrier(); + this_cpu_write(cpu_tlbstate.loaded_mm, next); this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); } -- GitLab From f822ceb826080784fa3db2e63a28655befddf6fc Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 27 Aug 2018 14:40:09 -0700 Subject: [PATCH 1171/1291] x86/irqflags: Mark native_restore_fl extern inline commit 1f59a4581b5ecfe9b4f049a7a2cf904d8352842d upstream. This should have been marked extern inline in order to pick up the out of line definition in arch/x86/kernel/irqflags.S. Fixes: 208cbb325589 ("x86/irqflags: Provide a declaration for native_save_fl") Reported-by: Ben Hutchings Signed-off-by: Nick Desaulniers Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: "H. Peter Anvin" Cc: Boris Ostrovsky Cc: Greg Kroah-Hartman Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180827214011.55428-1-ndesaulniers@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/irqflags.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index c14f2a74b2be..15450a675031 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -33,7 +33,8 @@ extern inline unsigned long native_save_fl(void) return flags; } -static inline void native_restore_fl(unsigned long flags) +extern inline void native_restore_fl(unsigned long flags); +extern inline void native_restore_fl(unsigned long flags) { asm volatile("push %0 ; popf" : /* no output */ -- GitLab From f64979512c5e2c4015ed4b2bdf2291b4f294d451 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 24 Aug 2018 10:03:51 -0700 Subject: [PATCH 1172/1291] x86/spectre: Add missing family 6 check to microcode check commit 1ab534e85c93945f7862378d8c8adcf408205b19 upstream. The check for Spectre microcodes does not check for family 6, only the model numbers. Add a family 6 check to avoid ambiguity with other families. Fixes: a5b296636453 ("x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early Spectre v2 microcodes") Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180824170351.34874-2-andi@firstfloor.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 278be092b300..574dcdc092ab 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -150,6 +150,9 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_HYPERVISOR)) return false; + if (c->x86 != 6) + return false; + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { if (c->x86_model == spectre_bad_microcodes[i].model && c->x86_stepping == spectre_bad_microcodes[i].stepping) -- GitLab From ec4034835eaf9aba4399e3c6770c3e6d6cc09504 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 24 Aug 2018 10:03:50 -0700 Subject: [PATCH 1173/1291] x86/speculation/l1tf: Increase l1tf memory limit for Nehalem+ commit cc51e5428ea54f575d49cfcede1d4cb3a72b4ec4 upstream. On Nehalem and newer core CPUs the CPU cache internally uses 44 bits physical address space. The L1TF workaround is limited by this internal cache address width, and needs to have one bit free there for the mitigation to work. Older client systems report only 36bit physical address space so the range check decides that L1TF is not mitigated for a 36bit phys/32GB system with some memory holes. But since these actually have the larger internal cache width this warning is bogus because it would only really be needed if the system had more than 43bits of memory. Add a new internal x86_cache_bits field. Normally it is the same as the physical bits field reported by CPUID, but for Nehalem and newerforce it to be at least 44bits. Change the L1TF memory size warning to use the new cache_bits field to avoid bogus warnings and remove the bogus comment about memory size. Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf") Reported-by: George Anchev Reported-by: Christopher Snowhill Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org Cc: Michael Hocko Cc: vbabka@suse.cz Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180824170351.34874-1-andi@firstfloor.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/processor.h | 4 ++- arch/x86/kernel/cpu/bugs.c | 46 ++++++++++++++++++++++++++++---- arch/x86/kernel/cpu/common.c | 1 + 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b222a38a59ed..b12c8d70dd33 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -132,6 +132,8 @@ struct cpuinfo_x86 { /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; + /* Address space bits used by the cache internally */ + u8 x86_cache_bits; } __randomize_layout; struct cpuid_regs { @@ -182,7 +184,7 @@ extern void cpu_detect(struct cpuinfo_x86 *c); static inline unsigned long long l1tf_pfn_limit(void) { - return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT); + return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT); } extern void early_cpu_init(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1fb88ee56c8a..3e435f88621d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -652,6 +652,45 @@ EXPORT_SYMBOL_GPL(l1tf_mitigation); enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation); +/* + * These CPUs all support 44bits physical address space internally in the + * cache but CPUID can report a smaller number of physical address bits. + * + * The L1TF mitigation uses the top most address bit for the inversion of + * non present PTEs. When the installed memory reaches into the top most + * address bit due to memory holes, which has been observed on machines + * which report 36bits physical address bits and have 32G RAM installed, + * then the mitigation range check in l1tf_select_mitigation() triggers. + * This is a false positive because the mitigation is still possible due to + * the fact that the cache uses 44bit internally. Use the cache bits + * instead of the reported physical bits and adjust them on the affected + * machines to 44bit if the reported bits are less than 44. + */ +static void override_cache_bits(struct cpuinfo_x86 *c) +{ + if (c->x86 != 6) + return; + + switch (c->x86_model) { + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_HASWELL_CORE: + case INTEL_FAM6_HASWELL_ULT: + case INTEL_FAM6_HASWELL_GT3E: + case INTEL_FAM6_BROADWELL_CORE: + case INTEL_FAM6_BROADWELL_GT3E: + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: + if (c->x86_cache_bits < 44) + c->x86_cache_bits = 44; + break; + } +} + static void __init l1tf_select_mitigation(void) { u64 half_pa; @@ -659,6 +698,8 @@ static void __init l1tf_select_mitigation(void) if (!boot_cpu_has_bug(X86_BUG_L1TF)) return; + override_cache_bits(&boot_cpu_data); + switch (l1tf_mitigation) { case L1TF_MITIGATION_OFF: case L1TF_MITIGATION_FLUSH_NOWARN: @@ -678,11 +719,6 @@ static void __init l1tf_select_mitigation(void) return; #endif - /* - * This is extremely unlikely to happen because almost all - * systems have far more MAX_PA/2 than RAM can be fit into - * DIMM slots. - */ half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) { pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dd02ee4fa8cd..7d2a7890a823 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -890,6 +890,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) } } #endif + c->x86_cache_bits = c->x86_phys_bits; } static const __initconst struct x86_cpu_id cpu_no_speculation[] = { -- GitLab From cf9fcdd6c2a258b817361967b66b5aba5b7376f2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 15 Aug 2018 08:14:37 -0500 Subject: [PATCH 1174/1291] hwmon: (nct6775) Fix potential Spectre v1 commit d49dbfade96d5b0863ca8a90122a805edd5ef50a upstream. val can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: vers/hwmon/nct6775.c:2698 store_pwm_weight_temp_sel() warn: potential spectre issue 'data->temp_src' [r] Fix this by sanitizing val before using it to index data->temp_src Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/nct6775.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 5f87764d7015..ca9941fa741b 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -63,6 +63,7 @@ #include #include #include +#include #include "lm75.h" #define USE_ALTERNATE @@ -2642,6 +2643,7 @@ store_pwm_weight_temp_sel(struct device *dev, struct device_attribute *attr, return err; if (val > NUM_TEMP) return -EINVAL; + val = array_index_nospec(val, NUM_TEMP + 1); if (val && (!(data->have_temp & BIT(val - 1)) || !data->temp_src[val - 1])) return -EINVAL; -- GitLab From bbcbaf56ff4b677974d8bb3dd67a134f61055dd1 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 28 Aug 2018 20:40:33 +0200 Subject: [PATCH 1175/1291] x86/entry/64: Wipe KASAN stack shadow before rewind_stack_do_exit() commit f12d11c5c184626b4befdee3d573ec8237405a33 upstream. Reset the KASAN shadow state of the task stack before rewinding RSP. Without this, a kernel oops will leave parts of the stack poisoned, and code running under do_exit() can trip over such poisoned regions and cause nonsensical false-positive KASAN reports about stack-out-of-bounds bugs. This does not wipe the exception stacks; if an oops happens on an exception stack, it might result in random KASAN false-positives from other tasks afterwards. This is probably relatively uninteresting, since if the kernel oopses on an exception stack, there are most likely bigger things to worry about. It'd be more interesting if vmapped stacks and KASAN were compatible, since then handle_stack_overflow() would oops from exception stack context. Fixes: 2deb4be28077 ("x86/dumpstack: When OOPSing, rewind the stack before do_exit()") Signed-off-by: Jann Horn Signed-off-by: Thomas Gleixner Acked-by: Andrey Ryabinin Cc: Andy Lutomirski Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: Kees Cook Cc: kasan-dev@googlegroups.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180828184033.93712-1-jannh@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/dumpstack.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index a2d8a3908670..224de37821e4 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -298,7 +299,10 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) * We're not going to return, but we might be on an IST stack or * have very little stack space left. Rewind the stack and kill * the task. + * Before we rewind the stack, we have to tell KASAN that we're going to + * reuse the task stack and that existing poisons are invalid. */ + kasan_unpoison_task_stack(current); rewind_stack_do_exit(signr); } NOKPROBE_SYMBOL(oops_end); -- GitLab From 9fae74e9a44141e8061965505ee8c66881bd6c57 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 7 Aug 2018 18:57:11 +0200 Subject: [PATCH 1176/1291] s390/mm: fix addressing exception after suspend/resume commit 37a366face294facb9c9d9fdd9f5b64a27456cbd upstream. Commit c9b5ad546e7d "s390/mm: tag normal pages vs pages used in page tables" accidentally changed the logic in arch_set_page_states(), which is used by the suspend/resume code. set_page_stable(page, order) was changed to set_page_stable_dat(page, 0). After this, only the first page of higher order pages will be set to stable, and a write to one of the unstable pages will result in an addressing exception. Fix this by using "order" again, instead of "0". Fixes: c9b5ad546e7d ("s390/mm: tag normal pages vs pages used in page tables") Cc: stable@vger.kernel.org # 4.14+ Reviewed-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/page-states.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 382153ff17e3..dc3cede7f2ec 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -271,7 +271,7 @@ void arch_set_page_states(int make_stable) list_for_each(l, &zone->free_area[order].free_list[t]) { page = list_entry(l, struct page, lru); if (make_stable) - set_page_stable_dat(page, 0); + set_page_stable_dat(page, order); else set_page_unused(page, order); } -- GitLab From bcd169a2726a789699d487cc35a606cdf94c7d85 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 6 Aug 2018 14:26:39 +0200 Subject: [PATCH 1177/1291] s390: fix br_r1_trampoline for machines without exrl commit 26f843848bae973817b3587780ce6b7b0200d3e4 upstream. For machines without the exrl instruction the BFP jit generates code that uses an "br %r1" instruction located in the lowcore page. Unfortunately there is a cut & paste error that puts an additional "larl %r1,.+14" instruction in the code that clobbers the branch target address in %r1. Remove the larl instruction. Cc: # v4.17+ Fixes: de5cb6eb51 ("s390: use expoline thunks in the BPF JIT") Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/net/bpf_jit_comp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 45f1ea117128..6b1474fa99ab 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -518,8 +518,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit) /* br %r1 */ _EMIT2(0x07f1); } else { - /* larl %r1,.+14 */ - EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); /* ex 0,S390_lowcore.br_r1_tampoline */ EMIT4_DISP(0x44000000, REG_0, REG_0, offsetof(struct lowcore, br_r1_trampoline)); -- GitLab From 97e3dcc08e4ee3b2edd93514479f98b7e79ac056 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 16 May 2018 09:37:25 +0200 Subject: [PATCH 1178/1291] s390/qdio: reset old sbal_state flags commit 64e03ff72623b8c2ea89ca3cb660094e019ed4ae upstream. When allocating a new AOB fails, handle_outbound() is still capable of transmitting the selected buffer (just without async completion). But if a previous transfer on this queue slot used async completion, its sbal_state flags field is still set to QDIO_OUTBUF_STATE_FLAG_PENDING. So when the upper layer driver sees this stale flag, it expects an async completion that never happens. Fix this by unconditionally clearing the flags field. Fixes: 104ea556ee7f ("qdio: support asynchronous delivery of storage blocks") Cc: #v3.2+ Signed-off-by: Julian Wiedmann Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/qdio.h | 1 - drivers/s390/cio/qdio_main.c | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index de11ecc99c7c..9c9970a5dfb1 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -262,7 +262,6 @@ struct qdio_outbuf_state { void *user; }; -#define QDIO_OUTBUF_STATE_FLAG_NONE 0x00 #define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01 #define CHSC_AC1_INITIATE_INPUTQ 0x80 diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 8941e7caaf4d..c7afdbded26b 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -641,21 +641,20 @@ static inline unsigned long qdio_aob_for_buffer(struct qdio_output_q *q, unsigned long phys_aob = 0; if (!q->use_cq) - goto out; + return 0; if (!q->aobs[bufnr]) { struct qaob *aob = qdio_allocate_aob(); q->aobs[bufnr] = aob; } if (q->aobs[bufnr]) { - q->sbal_state[bufnr].flags = QDIO_OUTBUF_STATE_FLAG_NONE; q->sbal_state[bufnr].aob = q->aobs[bufnr]; q->aobs[bufnr]->user1 = (u64) q->sbal_state[bufnr].user; phys_aob = virt_to_phys(q->aobs[bufnr]); WARN_ON_ONCE(phys_aob & 0xFF); } -out: + q->sbal_state[bufnr].flags = 0; return phys_aob; } -- GitLab From 2ac8fbd174d76e23e29faf6020911cb69c8a154a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 31 Jul 2018 16:14:18 +0200 Subject: [PATCH 1179/1291] s390/numa: move initial setup of node_to_cpumask_map commit fb7d7518b0d65955f91c7b875c36eae7694c69bd upstream. The numa_init_early initcall sets the node_to_cpumask_map[0] to the full cpu_possible_mask. Unfortunately this early_initcall is too late, the NUMA setup for numa=emu is done even earlier. The order of calls is numa_setup() -> emu_update_cpu_topology(), then the early_initcalls(), followed by sched_init_domains(). Starting with git commit 051f3ca02e46432c0965e8948f00c07d8a2f09c0 "sched/topology: Introduce NUMA identity node sched domain" the incorrect node_to_cpumask_map[0] really screws up the domain setup and the kernel panics with the follow oops: Cc: # v4.15+ Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/numa/numa.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index 06a80434cfe6..5bd374491f94 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -134,26 +134,14 @@ void __init numa_setup(void) { pr_info("NUMA mode: %s\n", mode->name); nodes_clear(node_possible_map); + /* Initially attach all possible CPUs to node 0. */ + cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask); if (mode->setup) mode->setup(); numa_setup_memory(); memblock_dump_all(); } -/* - * numa_init_early() - Initialization initcall - * - * This runs when only one CPU is online and before the first - * topology update is called for by the scheduler. - */ -static int __init numa_init_early(void) -{ - /* Attach all possible CPUs to node 0 for now. */ - cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask); - return 0; -} -early_initcall(numa_init_early); - /* * numa_init_late() - Initialization initcall * -- GitLab From 0536c9e41f3f8299e67f9f4de3aa064351cb23af Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 13 Aug 2018 11:26:46 +0200 Subject: [PATCH 1180/1291] s390/pci: fix out of bounds access during irq setup commit 866f3576a72b2233a76dffb80290f8086dc49e17 upstream. During interrupt setup we allocate interrupt vectors, walk the list of msi descriptors, and fill in the message data. Requesting more interrupts than supported on s390 can lead to an out of bounds access. When we restrict the number of interrupts we should also stop walking the msi list after all supported interrupts are handled. Cc: stable@vger.kernel.org Signed-off-by: Sebastian Ott Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- arch/s390/pci/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 0fe649c0d542..960c4a362d8c 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -420,6 +420,8 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) hwirq = 0; for_each_pci_msi_entry(msi, pdev) { rc = -EIO; + if (hwirq >= msi_vecs) + break; irq = irq_alloc_desc(0); /* Alloc irq on node 0 */ if (irq < 0) return -ENOMEM; -- GitLab From 6ba27d3e2b4acfa9c0d16132564393d646ea2a4d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 28 Apr 2018 21:37:33 +0900 Subject: [PATCH 1181/1291] kprobes/arm: Fix %p uses in error messages commit 75b2f5f5911fe7a2fc82969b2b24dde34e8f820d upstream. Fix %p uses in error messages by removing it and using general dumper. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Arnd Bergmann Cc: David Howells Cc: David S . Miller Cc: Heiko Carstens Cc: Jon Medhurst Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thomas Richter Cc: Tobin C . Harding Cc: Will Deacon Cc: acme@kernel.org Cc: akpm@linux-foundation.org Cc: brueckner@linux.vnet.ibm.com Cc: linux-arch@vger.kernel.org Cc: rostedt@goodmis.org Cc: schwidefsky@de.ibm.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/lkml/152491905361.9916.15300852365956231645.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/arm/probes/kprobes/core.c | 4 ++-- arch/arm/probes/kprobes/test-core.c | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 52d1cd14fda4..091e9a3c2dcb 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -291,8 +291,8 @@ void __kprobes kprobe_handler(struct pt_regs *regs) break; case KPROBE_REENTER: /* A nested probe was hit in FIQ, it is a BUG */ - pr_warn("Unrecoverable kprobe detected at %p.\n", - p->addr); + pr_warn("Unrecoverable kprobe detected.\n"); + dump_kprobe(p); /* fall through */ default: /* impossible cases */ diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index 1c98a87786ca..a10d7187ad2c 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -1517,7 +1517,6 @@ static bool check_test_results(void) print_registers(&result_regs); if (mem) { - pr_err("current_stack=%p\n", current_stack); pr_err("expected_memory:\n"); print_memory(expected_memory, mem_size); pr_err("result_memory:\n"); -- GitLab From 4bdf9c17598014d0f10c7500edeefc25e0f501c7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 28 Apr 2018 21:35:01 +0900 Subject: [PATCH 1182/1291] kprobes: Make list and blacklist root user read only commit f2a3ab36077222437b4826fc76111caa14562b7c upstream. Since the blacklist and list files on debugfs indicates a sensitive address information to reader, it should be restricted to the root user. Suggested-by: Thomas Richter Suggested-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Arnd Bergmann Cc: David Howells Cc: David S . Miller Cc: Heiko Carstens Cc: Jon Medhurst Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tobin C . Harding Cc: Will Deacon Cc: acme@kernel.org Cc: akpm@linux-foundation.org Cc: brueckner@linux.vnet.ibm.com Cc: linux-arch@vger.kernel.org Cc: rostedt@goodmis.org Cc: schwidefsky@de.ibm.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/lkml/152491890171.9916.5183693615601334087.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/kprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index a66e838640ea..5c90765d37e7 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2531,7 +2531,7 @@ static int __init debugfs_kprobe_init(void) if (!dir) return -ENOMEM; - file = debugfs_create_file("list", 0444, dir, NULL, + file = debugfs_create_file("list", 0400, dir, NULL, &debugfs_kprobes_operations); if (!file) goto error; @@ -2541,7 +2541,7 @@ static int __init debugfs_kprobe_init(void) if (!file) goto error; - file = debugfs_create_file("blacklist", 0444, dir, NULL, + file = debugfs_create_file("blacklist", 0400, dir, NULL, &debugfs_kprobe_blacklist_ops); if (!file) goto error; -- GitLab From 62c59b1ddbdc62ca9cf775ecc0d77ebb380105cf Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 15 May 2018 23:33:26 +0100 Subject: [PATCH 1183/1291] MIPS: Correct the 64-bit DSP accumulator register size commit f5958b4cf4fc38ed4583ab83fb7c4cd1ab05f47b upstream. Use the `unsigned long' rather than `__u32' type for DSP accumulator registers, like with the regular MIPS multiply/divide accumulator and general-purpose registers, as all are 64-bit in 64-bit implementations and using a 32-bit data type leads to contents truncation on context saving. Update `arch_ptrace' and `compat_arch_ptrace' accordingly, removing casts that are similarly not used with multiply/divide accumulator or general-purpose register accesses. Signed-off-by: Maciej W. Rozycki Signed-off-by: Paul Burton Fixes: e50c0a8fa60d ("Support the MIPS32 / MIPS64 DSP ASE.") Patchwork: https://patchwork.linux-mips.org/patch/19329/ Cc: Alexander Viro Cc: James Hogan Cc: Ralf Baechle Cc: linux-fsdevel@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 2.6.15+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/processor.h | 2 +- arch/mips/kernel/ptrace.c | 2 +- arch/mips/kernel/ptrace32.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 95b8c471f572..c847f47d9563 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -141,7 +141,7 @@ struct mips_fpu_struct { #define NUM_DSP_REGS 6 -typedef __u32 dspreg_t; +typedef unsigned long dspreg_t; struct mips_dsp_state { dspreg_t dspr[NUM_DSP_REGS]; diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index e058cd300713..efffdf2464ab 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -847,7 +847,7 @@ long arch_ptrace(struct task_struct *child, long request, goto out; } dregs = __get_dsp_regs(child); - tmp = (unsigned long) (dregs[addr - DSP_BASE]); + tmp = dregs[addr - DSP_BASE]; break; } case DSP_CONTROL: diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 89026d33a07b..6990240785f6 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -141,7 +141,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, goto out; } dregs = __get_dsp_regs(child); - tmp = (unsigned long) (dregs[addr - DSP_BASE]); + tmp = dregs[addr - DSP_BASE]; break; } case DSP_CONTROL: -- GitLab From 156b5e33ab1214dd3301acb8c7e3ff67fa827532 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 18 Jun 2018 17:37:59 -0700 Subject: [PATCH 1184/1291] MIPS: Always use -march=, not - shortcuts commit 344ebf09949c31bcb8818d8458b65add29f1d67b upstream. The VDSO Makefile filters CFLAGS to select a subset which it uses whilst building the VDSO ELF. One of the flags it allows through is the -march= flag that selects the architecture/ISA to target. Unfortunately in cases where CONFIG_CPU_MIPS32_R{1,2}=y and the toolchain defaults to building for MIPS64, the main MIPS Makefile ends up using the short-form - flags in cflags-y. This is because the calls to cc-option always fail to use the long-form -march= flag due to the lack of an -mabi= flag in KBUILD_CFLAGS at the point where the cc-option function is executed. The resulting GCC invocation is something like: $ mips64-linux-gcc -Werror -march=mips32r2 -c -x c /dev/null -o tmp cc1: error: '-march=mips32r2' is not compatible with the selected ABI These short-form - flags are dropped by the VDSO Makefile's filtering, and so we attempt to build the VDSO without specifying any architecture. This results in an attempt to build the VDSO using whatever the compiler's default architecture is, regardless of whether that is suitable for the kernel configuration. One encountered build failure resulting from this mismatch is a rejection of the sync instruction if the kernel is configured for a MIPS32 or MIPS64 r1 or r2 target but the toolchain defaults to an older architecture revision such as MIPS1 which did not include the sync instruction: CC arch/mips/vdso/gettimeofday.o /tmp/ccGQKoOj.s: Assembler messages: /tmp/ccGQKoOj.s:273: Error: opcode not supported on this processor: mips1 (mips1) `sync' /tmp/ccGQKoOj.s:329: Error: opcode not supported on this processor: mips1 (mips1) `sync' /tmp/ccGQKoOj.s:520: Error: opcode not supported on this processor: mips1 (mips1) `sync' /tmp/ccGQKoOj.s:714: Error: opcode not supported on this processor: mips1 (mips1) `sync' /tmp/ccGQKoOj.s:1009: Error: opcode not supported on this processor: mips1 (mips1) `sync' /tmp/ccGQKoOj.s:1066: Error: opcode not supported on this processor: mips1 (mips1) `sync' /tmp/ccGQKoOj.s:1114: Error: opcode not supported on this processor: mips1 (mips1) `sync' /tmp/ccGQKoOj.s:1279: Error: opcode not supported on this processor: mips1 (mips1) `sync' /tmp/ccGQKoOj.s:1334: Error: opcode not supported on this processor: mips1 (mips1) `sync' /tmp/ccGQKoOj.s:1374: Error: opcode not supported on this processor: mips1 (mips1) `sync' /tmp/ccGQKoOj.s:1459: Error: opcode not supported on this processor: mips1 (mips1) `sync' /tmp/ccGQKoOj.s:1514: Error: opcode not supported on this processor: mips1 (mips1) `sync' /tmp/ccGQKoOj.s:1814: Error: opcode not supported on this processor: mips1 (mips1) `sync' /tmp/ccGQKoOj.s:2002: Error: opcode not supported on this processor: mips1 (mips1) `sync' /tmp/ccGQKoOj.s:2066: Error: opcode not supported on this processor: mips1 (mips1) `sync' make[2]: *** [scripts/Makefile.build:318: arch/mips/vdso/gettimeofday.o] Error 1 make[1]: *** [scripts/Makefile.build:558: arch/mips/vdso] Error 2 make[1]: *** Waiting for unfinished jobs.... This can be reproduced for example by attempting to build pistachio_defconfig using Arnd's GCC 8.1.0 mips64 toolchain from kernel.org: https://mirrors.edge.kernel.org/pub/tools/crosstool/files/bin/x86_64/8.1.0/x86_64-gcc-8.1.0-nolibc-mips64-linux.tar.xz Resolve this problem by using the long-form -march= in all cases, which makes it through the arch/mips/vdso/Makefile's filtering & is thus consistently used to build both the kernel proper & the VDSO. The use of cc-option to prefer the long-form & fall back to the short-form flags makes no sense since the short-form is just an abbreviation for the also-supported long-form in all GCC versions that we support building with. This means there is no case in which we have to use the short-form - flags, so we can simply remove them. The manual redefinition of _MIPS_ISA is removed naturally along with the use of the short-form flags that it accompanied, and whilst here we remove the separate assembler ISA selection. I suspect that both of these were only required due to the mips32 vs mips2 mismatch that was introduced by commit 59b3e8e9aac6 ("[MIPS] Makefile crapectomy.") and fixed but not cleaned up by commit 9200c0b2a07c ("[MIPS] Fix Makefile bugs for MIPS32/MIPS64 R1 and R2."). I've marked this for backport as far as v4.4 where the MIPS VDSO was introduced. In earlier kernels there should be no ill effect to using the short-form flags. Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.4+ Reviewed-by: James Hogan Patchwork: https://patchwork.linux-mips.org/patch/19579/ Signed-off-by: Greg Kroah-Hartman --- arch/mips/Makefile | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/mips/Makefile b/arch/mips/Makefile index a96d97a806c9..5977884b008e 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -155,15 +155,11 @@ cflags-$(CONFIG_CPU_R4300) += -march=r4300 -Wa,--trap cflags-$(CONFIG_CPU_VR41XX) += -march=r4100 -Wa,--trap cflags-$(CONFIG_CPU_R4X00) += -march=r4600 -Wa,--trap cflags-$(CONFIG_CPU_TX49XX) += -march=r4600 -Wa,--trap -cflags-$(CONFIG_CPU_MIPS32_R1) += $(call cc-option,-march=mips32,-mips32 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS32) \ - -Wa,-mips32 -Wa,--trap -cflags-$(CONFIG_CPU_MIPS32_R2) += $(call cc-option,-march=mips32r2,-mips32r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS32) \ - -Wa,-mips32r2 -Wa,--trap +cflags-$(CONFIG_CPU_MIPS32_R1) += -march=mips32 -Wa,--trap +cflags-$(CONFIG_CPU_MIPS32_R2) += -march=mips32r2 -Wa,--trap cflags-$(CONFIG_CPU_MIPS32_R6) += -march=mips32r6 -Wa,--trap -modd-spreg -cflags-$(CONFIG_CPU_MIPS64_R1) += $(call cc-option,-march=mips64,-mips64 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) \ - -Wa,-mips64 -Wa,--trap -cflags-$(CONFIG_CPU_MIPS64_R2) += $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) \ - -Wa,-mips64r2 -Wa,--trap +cflags-$(CONFIG_CPU_MIPS64_R1) += -march=mips64 -Wa,--trap +cflags-$(CONFIG_CPU_MIPS64_R2) += -march=mips64r2 -Wa,--trap cflags-$(CONFIG_CPU_MIPS64_R6) += -march=mips64r6 -Wa,--trap cflags-$(CONFIG_CPU_R5000) += -march=r5000 -Wa,--trap cflags-$(CONFIG_CPU_R5432) += $(call cc-option,-march=r5400,-march=r5000) \ -- GitLab From 1c40cd97ffe335101dfb738f7af138d49fb8b44f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 13 Jul 2018 15:37:57 +0800 Subject: [PATCH 1185/1291] MIPS: Change definition of cpu_relax() for Loongson-3 commit a30718868915fbb991a9ae9e45594b059f28e9ae upstream. Linux expects that if a CPU modifies a memory location, then that modification will eventually become visible to other CPUs in the system. Loongson 3 CPUs include a Store Fill Buffer (SFB) which sits between a core & its L1 data cache, queueing memory accesses & allowing for faster forwarding of data from pending stores to younger loads from the core. Unfortunately the SFB prioritizes loads such that a continuous stream of loads may cause a pending write to be buffered indefinitely. This is problematic if we end up with 2 CPUs which each perform a store that the other polls for - one or both CPUs may end up with their stores buffered in the SFB, never reaching cache due to the continuous reads from the poll loop. Such a deadlock condition has been observed whilst running qspinlock code. This patch changes the definition of cpu_relax() to smp_mb() for Loongson-3, forcing a flush of the SFB on SMP systems which will cause any pending writes to make it as far as the L1 caches where they will become visible to other CPUs. If the kernel is not compiled for SMP support, this will expand to a barrier() as before. This workaround matches that currently implemented for ARM when CONFIG_ARM_ERRATA_754327=y, which was introduced by commit 534be1d5a2da ("ARM: 6194/1: change definition of cpu_relax() for ARM11MPCore"). Although the workaround is only required when the Loongson 3 SFB functionality is enabled, and we only began explicitly enabling that functionality in v4.7 with commit 1e820da3c9af ("MIPS: Loongson-3: Introduce CONFIG_LOONGSON3_ENHANCEMENT"), existing or future firmware may enable the SFB which means we may need the workaround backported to earlier kernels too. [paul.burton@mips.com: - Reword commit message & comment. - Limit stable backport to v3.15+ where we support Loongson 3 CPUs.] Signed-off-by: Huacai Chen Signed-off-by: Paul Burton References: 534be1d5a2da ("ARM: 6194/1: change definition of cpu_relax() for ARM11MPCore") References: 1e820da3c9af ("MIPS: Loongson-3: Introduce CONFIG_LOONGSON3_ENHANCEMENT") Patchwork: https://patchwork.linux-mips.org/patch/19830/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen Cc: stable@vger.kernel.org # v3.15+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/processor.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index c847f47d9563..eb1f6030ab85 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -388,7 +388,20 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) (task_pt_regs(tsk)->regs[29]) #define KSTK_STATUS(tsk) (task_pt_regs(tsk)->cp0_status) +#ifdef CONFIG_CPU_LOONGSON3 +/* + * Loongson-3's SFB (Store-Fill-Buffer) may buffer writes indefinitely when a + * tight read loop is executed, because reads take priority over writes & the + * hardware (incorrectly) doesn't ensure that writes will eventually occur. + * + * Since spin loops of any kind should have a cpu_relax() in them, force an SFB + * flush from cpu_relax() such that any pending writes will become visible as + * expected. + */ +#define cpu_relax() smp_mb() +#else #define cpu_relax() barrier() +#endif /* * Return_address is a replacement for __builtin_return_address(count) -- GitLab From ba0797a8016c303e3ab72276e60fb2858c5b4b1a Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 21 Aug 2018 12:12:59 -0700 Subject: [PATCH 1186/1291] MIPS: lib: Provide MIPS64r6 __multi3() for GCC < 7 commit 690d9163bf4b8563a2682e619f938e6a0443947f upstream. Some versions of GCC suboptimally generate calls to the __multi3() intrinsic for MIPS64r6 builds, resulting in link failures due to the missing function: LD vmlinux.o MODPOST vmlinux.o kernel/bpf/verifier.o: In function `kmalloc_array': include/linux/slab.h:631: undefined reference to `__multi3' fs/select.o: In function `kmalloc_array': include/linux/slab.h:631: undefined reference to `__multi3' ... We already have a workaround for this in which we provide the instrinsic, but we do so selectively for GCC 7 only. Unfortunately the issue occurs with older GCC versions too - it has been observed with both GCC 5.4.0 & GCC 6.4.0. MIPSr6 support was introduced in GCC 5, so all major GCC versions prior to GCC 8 are affected and we extend our workaround accordingly to all MIPS64r6 builds using GCC versions older than GCC 8. Signed-off-by: Paul Burton Reported-by: Vladimir Kondratiev Fixes: ebabcf17bcd7 ("MIPS: Implement __multi3 for GCC7 MIPS64r6 builds") Patchwork: https://patchwork.linux-mips.org/patch/20297/ Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 4.15+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/lib/multi3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/lib/multi3.c b/arch/mips/lib/multi3.c index 111ad475aa0c..4c2483f410c2 100644 --- a/arch/mips/lib/multi3.c +++ b/arch/mips/lib/multi3.c @@ -4,12 +4,12 @@ #include "libgcc.h" /* - * GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that - * specific case only we'll implement it here. + * GCC 7 & older can suboptimally generate __multi3 calls for mips64r6, so for + * that specific case only we implement that intrinsic here. * * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981 */ -#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7) +#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ < 8) /* multiply 64-bit values, low 64-bits returned */ static inline long long notrace dmulu(long long a, long long b) -- GitLab From 61ec14f42c844b7d011e0a939e522d577b2ed672 Mon Sep 17 00:00:00 2001 From: Ricardo Schwarzmeier Date: Tue, 26 Jun 2018 17:31:45 +0200 Subject: [PATCH 1187/1291] tpm: Return the actual size when receiving an unsupported command commit 36a11029b07ee30bdc4553274d0efea645ed9d91 upstream. The userpace expects to read the number of bytes stated in the header. Returning the size of the buffer instead would be unexpected. Cc: stable@vger.kernel.org Fixes: 095531f891e6 ("tpm: return a TPM_RC_COMMAND_CODE response if command is not implemented") Signed-off-by: Ricardo Schwarzmeier Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index dba5259def60..86b526b7d990 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -423,7 +423,7 @@ static ssize_t tpm_try_transmit(struct tpm_chip *chip, header->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS); header->return_code = cpu_to_be32(TPM2_RC_COMMAND_CODE | TSS2_RESMGR_TPM_RC_LAYER); - return bufsiz; + return sizeof(*header); } if (bufsiz > TPM_BUFSIZE) -- GitLab From d071004e0249a2ecc5d856dd680835ad46fde062 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Jun 2018 14:41:58 -0700 Subject: [PATCH 1188/1291] scsi: mpt3sas: Fix _transport_smp_handler() error path commit 91b7bdb2c0089cbbb817df6888ab1458c645184e upstream. This patch avoids that smatch complains about a double unlock on ioc->transport_cmds.mutex. Fixes: 651a01364994 ("scsi: scsi_transport_sas: switch to bsg-lib for SMP passthrough") Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Sathya Prakash Cc: Chaitra P B Cc: Suganath Prabu Subramani Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_transport.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c index d3940c5d079d..63dd9bc21ff2 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_transport.c +++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c @@ -1936,12 +1936,12 @@ _transport_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, pr_info(MPT3SAS_FMT "%s: host reset in progress!\n", __func__, ioc->name); rc = -EFAULT; - goto out; + goto job_done; } rc = mutex_lock_interruptible(&ioc->transport_cmds.mutex); if (rc) - goto out; + goto job_done; if (ioc->transport_cmds.status != MPT3_CMD_NOT_USED) { pr_err(MPT3SAS_FMT "%s: transport_cmds in use\n", ioc->name, @@ -2066,6 +2066,7 @@ _transport_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, out: ioc->transport_cmds.status = MPT3_CMD_NOT_USED; mutex_unlock(&ioc->transport_cmds.mutex); +job_done: bsg_job_done(job, rc, reslen); } -- GitLab From c984f4d1d40a2f349503b3faf946502ccbf02f9f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 2 Aug 2018 10:51:40 -0700 Subject: [PATCH 1189/1291] scsi: sysfs: Introduce sysfs_{un,}break_active_protection() commit 2afc9166f79b8f6da5f347f48515215ceee4ae37 upstream. Introduce these two functions and export them such that the next patch can add calls to these functions from the SCSI core. Signed-off-by: Bart Van Assche Acked-by: Tejun Heo Acked-by: Greg Kroah-Hartman Cc: Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 44 +++++++++++++++++++++++++++++++++++++++++++ include/linux/sysfs.h | 14 ++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 39c75a86c67f..666986b95c5d 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -407,6 +407,50 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, } EXPORT_SYMBOL_GPL(sysfs_chmod_file); +/** + * sysfs_break_active_protection - break "active" protection + * @kobj: The kernel object @attr is associated with. + * @attr: The attribute to break the "active" protection for. + * + * With sysfs, just like kernfs, deletion of an attribute is postponed until + * all active .show() and .store() callbacks have finished unless this function + * is called. Hence this function is useful in methods that implement self + * deletion. + */ +struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, + const struct attribute *attr) +{ + struct kernfs_node *kn; + + kobject_get(kobj); + kn = kernfs_find_and_get(kobj->sd, attr->name); + if (kn) + kernfs_break_active_protection(kn); + return kn; +} +EXPORT_SYMBOL_GPL(sysfs_break_active_protection); + +/** + * sysfs_unbreak_active_protection - restore "active" protection + * @kn: Pointer returned by sysfs_break_active_protection(). + * + * Undo the effects of sysfs_break_active_protection(). Since this function + * calls kernfs_put() on the kernfs node that corresponds to the 'attr' + * argument passed to sysfs_break_active_protection() that attribute may have + * been removed between the sysfs_break_active_protection() and + * sysfs_unbreak_active_protection() calls, it is not safe to access @kn after + * this function has returned. + */ +void sysfs_unbreak_active_protection(struct kernfs_node *kn) +{ + struct kobject *kobj = kn->parent->priv; + + kernfs_unbreak_active_protection(kn); + kernfs_put(kn); + kobject_put(kobj); +} +EXPORT_SYMBOL_GPL(sysfs_unbreak_active_protection); + /** * sysfs_remove_file_ns - remove an object attribute with a custom ns tag * @kobj: object we're acting for diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 40839c02d28c..cca19bb200bd 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -239,6 +239,9 @@ int __must_check sysfs_create_files(struct kobject *kobj, const struct attribute **attr); int __must_check sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, umode_t mode); +struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, + const struct attribute *attr); +void sysfs_unbreak_active_protection(struct kernfs_node *kn); void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns); bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr); @@ -352,6 +355,17 @@ static inline int sysfs_chmod_file(struct kobject *kobj, return 0; } +static inline struct kernfs_node * +sysfs_break_active_protection(struct kobject *kobj, + const struct attribute *attr) +{ + return NULL; +} + +static inline void sysfs_unbreak_active_protection(struct kernfs_node *kn) +{ +} + static inline void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) -- GitLab From 5b55b24cec4ce55da8412cb9768e3d5165f72bd7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 2 Aug 2018 10:51:41 -0700 Subject: [PATCH 1190/1291] scsi: core: Avoid that SCSI device removal through sysfs triggers a deadlock commit 0ee223b2e1f67cb2de9c0e3247c510d846e74d63 upstream. A long time ago the unfortunate decision was taken to add a self-deletion attribute to the sysfs SCSI device directory. That decision was unfortunate because self-deletion is really tricky. We can't drop that attribute because widely used user space software depends on it, namely the rescan-scsi-bus.sh script. Hence this patch that avoids that writing into that attribute triggers a deadlock. See also commit 7973cbd9fbd9 ("[PATCH] add sysfs attributes to scan and delete scsi_devices"). This patch avoids that self-removal triggers the following deadlock: ====================================================== WARNING: possible circular locking dependency detected 4.18.0-rc2-dbg+ #5 Not tainted ------------------------------------------------------ modprobe/6539 is trying to acquire lock: 000000008323c4cd (kn->count#202){++++}, at: kernfs_remove_by_name_ns+0x45/0x90 but task is already holding lock: 00000000a6ec2c69 (&shost->scan_mutex){+.+.}, at: scsi_remove_host+0x21/0x150 [scsi_mod] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&shost->scan_mutex){+.+.}: __mutex_lock+0xfe/0xc70 mutex_lock_nested+0x1b/0x20 scsi_remove_device+0x26/0x40 [scsi_mod] sdev_store_delete+0x27/0x30 [scsi_mod] dev_attr_store+0x3e/0x50 sysfs_kf_write+0x87/0xa0 kernfs_fop_write+0x190/0x230 __vfs_write+0xd2/0x3b0 vfs_write+0x101/0x270 ksys_write+0xab/0x120 __x64_sys_write+0x43/0x50 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #0 (kn->count#202){++++}: lock_acquire+0xd2/0x260 __kernfs_remove+0x424/0x4a0 kernfs_remove_by_name_ns+0x45/0x90 remove_files.isra.1+0x3a/0x90 sysfs_remove_group+0x5c/0xc0 sysfs_remove_groups+0x39/0x60 device_remove_attrs+0x82/0xb0 device_del+0x251/0x580 __scsi_remove_device+0x19f/0x1d0 [scsi_mod] scsi_forget_host+0x37/0xb0 [scsi_mod] scsi_remove_host+0x9b/0x150 [scsi_mod] sdebug_driver_remove+0x4b/0x150 [scsi_debug] device_release_driver_internal+0x241/0x360 device_release_driver+0x12/0x20 bus_remove_device+0x1bc/0x290 device_del+0x259/0x580 device_unregister+0x1a/0x70 sdebug_remove_adapter+0x8b/0xf0 [scsi_debug] scsi_debug_exit+0x76/0xe8 [scsi_debug] __x64_sys_delete_module+0x1c1/0x280 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&shost->scan_mutex); lock(kn->count#202); lock(&shost->scan_mutex); lock(kn->count#202); *** DEADLOCK *** 2 locks held by modprobe/6539: #0: 00000000efaf9298 (&dev->mutex){....}, at: device_release_driver_internal+0x68/0x360 #1: 00000000a6ec2c69 (&shost->scan_mutex){+.+.}, at: scsi_remove_host+0x21/0x150 [scsi_mod] stack backtrace: CPU: 10 PID: 6539 Comm: modprobe Not tainted 4.18.0-rc2-dbg+ #5 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0xa4/0xf5 print_circular_bug.isra.34+0x213/0x221 __lock_acquire+0x1a7e/0x1b50 lock_acquire+0xd2/0x260 __kernfs_remove+0x424/0x4a0 kernfs_remove_by_name_ns+0x45/0x90 remove_files.isra.1+0x3a/0x90 sysfs_remove_group+0x5c/0xc0 sysfs_remove_groups+0x39/0x60 device_remove_attrs+0x82/0xb0 device_del+0x251/0x580 __scsi_remove_device+0x19f/0x1d0 [scsi_mod] scsi_forget_host+0x37/0xb0 [scsi_mod] scsi_remove_host+0x9b/0x150 [scsi_mod] sdebug_driver_remove+0x4b/0x150 [scsi_debug] device_release_driver_internal+0x241/0x360 device_release_driver+0x12/0x20 bus_remove_device+0x1bc/0x290 device_del+0x259/0x580 device_unregister+0x1a/0x70 sdebug_remove_adapter+0x8b/0xf0 [scsi_debug] scsi_debug_exit+0x76/0xe8 [scsi_debug] __x64_sys_delete_module+0x1c1/0x280 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe See also https://www.mail-archive.com/linux-scsi@vger.kernel.org/msg54525.html. Fixes: ac0ece9174ac ("scsi: use device_remove_file_self() instead of device_schedule_callback()") Signed-off-by: Bart Van Assche Cc: Greg Kroah-Hartman Acked-by: Tejun Heo Cc: Johannes Thumshirn Cc: Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_sysfs.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 40406c162d0d..8ce12ffcbb7a 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -721,8 +721,24 @@ static ssize_t sdev_store_delete(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - if (device_remove_file_self(dev, attr)) - scsi_remove_device(to_scsi_device(dev)); + struct kernfs_node *kn; + + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + WARN_ON_ONCE(!kn); + /* + * Concurrent writes into the "delete" sysfs attribute may trigger + * concurrent calls to device_remove_file() and scsi_remove_device(). + * device_remove_file() handles concurrent removal calls by + * serializing these and by ignoring the second and later removal + * attempts. Concurrent calls of scsi_remove_device() are + * serialized. The second and later calls of scsi_remove_device() are + * ignored because the first call of that function changes the device + * state into SDEV_DEL. + */ + device_remove_file(dev, attr); + scsi_remove_device(to_scsi_device(dev)); + if (kn) + sysfs_unbreak_active_protection(kn); return count; }; static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete); -- GitLab From ae302d685162f13ba3ab9e1403dc1bdbccb8cec5 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 26 Jul 2018 12:13:49 -0500 Subject: [PATCH 1191/1291] iscsi target: fix session creation failure handling commit 26abc916a898d34c5ad159315a2f683def3c5555 upstream. The problem is that iscsi_login_zero_tsih_s1 sets conn->sess early in iscsi_login_set_conn_values. If the function fails later like when we alloc the idr it does kfree(sess) and leaves the conn->sess pointer set. iscsi_login_zero_tsih_s1 then returns -Exyz and we then call iscsi_target_login_sess_out and access the freed memory. This patch has iscsi_login_zero_tsih_s1 either completely setup the session or completely tear it down, so later in iscsi_target_login_sess_out we can just check for it being set to the connection. Cc: stable@vger.kernel.org Fixes: 0957627a9960 ("iscsi-target: Fix sess allocation leak in...") Signed-off-by: Mike Christie Acked-by: Martin K. Petersen Signed-off-by: Matthew Wilcox Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_login.c | 35 ++++++++++++++--------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index dc13afbd4c88..98e27da34f3c 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -345,8 +345,7 @@ static int iscsi_login_zero_tsih_s1( pr_err("idr_alloc() for sess_idr failed\n"); iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); - kfree(sess); - return -ENOMEM; + goto free_sess; } sess->creation_time = get_jiffies_64(); @@ -362,20 +361,28 @@ static int iscsi_login_zero_tsih_s1( ISCSI_LOGIN_STATUS_NO_RESOURCES); pr_err("Unable to allocate memory for" " struct iscsi_sess_ops.\n"); - kfree(sess); - return -ENOMEM; + goto remove_idr; } sess->se_sess = transport_init_session(TARGET_PROT_NORMAL); if (IS_ERR(sess->se_sess)) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); - kfree(sess->sess_ops); - kfree(sess); - return -ENOMEM; + goto free_ops; } return 0; + +free_ops: + kfree(sess->sess_ops); +remove_idr: + spin_lock_bh(&sess_idr_lock); + idr_remove(&sess_idr, sess->session_index); + spin_unlock_bh(&sess_idr_lock); +free_sess: + kfree(sess); + conn->sess = NULL; + return -ENOMEM; } static int iscsi_login_zero_tsih_s2( @@ -1162,13 +1169,13 @@ void iscsi_target_login_sess_out(struct iscsi_conn *conn, ISCSI_LOGIN_STATUS_INIT_ERR); if (!zero_tsih || !conn->sess) goto old_sess_out; - if (conn->sess->se_sess) - transport_free_session(conn->sess->se_sess); - if (conn->sess->session_index != 0) { - spin_lock_bh(&sess_idr_lock); - idr_remove(&sess_idr, conn->sess->session_index); - spin_unlock_bh(&sess_idr_lock); - } + + transport_free_session(conn->sess->se_sess); + + spin_lock_bh(&sess_idr_lock); + idr_remove(&sess_idr, conn->sess->session_index); + spin_unlock_bh(&sess_idr_lock); + kfree(conn->sess->sess_ops); kfree(conn->sess); conn->sess = NULL; -- GitLab From 2adc2541a5c4fc431350192a32198f5f90c2fe17 Mon Sep 17 00:00:00 2001 From: Alberto Panizzo Date: Fri, 6 Jul 2018 15:18:51 +0200 Subject: [PATCH 1192/1291] clk: rockchip: fix clk_i2sout parent selection bits on rk3399 commit a64ad008980c65d38e6cf6858429c78e6b740c41 upstream. Register, shift and mask were wrong according to datasheet. Fixes: 115510053e5e ("clk: rockchip: add clock controller for the RK3399") Cc: stable@vger.kernel.org Signed-off-by: Alberto Panizzo Signed-off-by: Anthony Brandon Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- drivers/clk/rockchip/clk-rk3399.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c index 6847120b61cd..62d0a69f8da0 100644 --- a/drivers/clk/rockchip/clk-rk3399.c +++ b/drivers/clk/rockchip/clk-rk3399.c @@ -630,7 +630,7 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = { MUX(0, "clk_i2sout_src", mux_i2sch_p, CLK_SET_RATE_PARENT, RK3399_CLKSEL_CON(31), 0, 2, MFLAGS), COMPOSITE_NODIV(SCLK_I2S_8CH_OUT, "clk_i2sout", mux_i2sout_p, CLK_SET_RATE_PARENT, - RK3399_CLKSEL_CON(30), 8, 2, MFLAGS, + RK3399_CLKSEL_CON(31), 2, 1, MFLAGS, RK3399_CLKGATE_CON(8), 12, GFLAGS), /* uart */ -- GitLab From d2a97eba0c4ee31435a4174ffa3818e26d8ee9ac Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Aug 2018 16:59:25 +0300 Subject: [PATCH 1193/1291] PM / clk: signedness bug in of_pm_clk_add_clks() commit 5e2e2f9f76e157063a656351728703cb02b068f1 upstream. "count" needs to be signed for the error handling to work. I made "i" signed as well so they match. Fixes: 02113ba93ea4 (PM / clk: Add support for obtaining clocks from device-tree) Cc: 4.6+ # 4.6+ Signed-off-by: Dan Carpenter Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/clock_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 8e2e4757adcb..5a42ae4078c2 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -185,7 +185,7 @@ EXPORT_SYMBOL_GPL(of_pm_clk_add_clk); int of_pm_clk_add_clks(struct device *dev) { struct clk **clks; - unsigned int i, count; + int i, count; int ret; if (!dev || !dev->of_node) -- GitLab From 54cecb7440bc280d54ce3e843818f2f9c3b1f98c Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 26 Jun 2018 15:28:29 +0200 Subject: [PATCH 1194/1291] power: generic-adc-battery: fix out-of-bounds write when copying channel properties commit 932d47448c3caa0fa99e84d7f5bc302aa286efd8 upstream. We did have sporadic problems in the pinctrl framework during boot where a pin group name unexpectedly became NULL leading to a NULL dereference in strcmp. Detailled analysis of the failing cases did reveal that there were two devm allocated objects close to each other. The second one was the affected group_desc in pinmux and the first one was the psy_desc->properties buffer of the gab driver. Review of the gab code showed that the address calculation for one memcpy() is wrong. It does properties + sizeof(type) * index but C is defined to do the index multiplication already for pointer + integer additions. Hence the factor was applied twice and the memcpy() does write outside of the properties buffer. Sometimes it happened to be the pinctrl and triggered the strcmp(NULL). Anyways, it is overkill to use a memcpy() here instead of a simple assignment, which is easier to read and has less risk for wrong address calculations. So we change code to a simple assignment. If we initialize the index to the first free location, we can even remove the local variable 'properties'. This bug seems to exist right from the beginning in 3.7-rc1 in commit e60fea794e6e ("power: battery: Generic battery driver using IIO") Signed-off-by: H. Nikolaus Schaller Cc: stable@vger.kernel.org Fixes: e60fea794e6e ("power: battery: Generic battery driver using IIO") Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/generic-adc-battery.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/power/supply/generic-adc-battery.c b/drivers/power/supply/generic-adc-battery.c index 37e523374fe0..9d3dbb5b8e98 100644 --- a/drivers/power/supply/generic-adc-battery.c +++ b/drivers/power/supply/generic-adc-battery.c @@ -243,10 +243,9 @@ static int gab_probe(struct platform_device *pdev) struct power_supply_desc *psy_desc; struct power_supply_config psy_cfg = {}; struct gab_platform_data *pdata = pdev->dev.platform_data; - enum power_supply_property *properties; int ret = 0; int chan; - int index = 0; + int index = ARRAY_SIZE(gab_props); adc_bat = devm_kzalloc(&pdev->dev, sizeof(*adc_bat), GFP_KERNEL); if (!adc_bat) { @@ -280,8 +279,6 @@ static int gab_probe(struct platform_device *pdev) } memcpy(psy_desc->properties, gab_props, sizeof(gab_props)); - properties = (enum power_supply_property *) - ((char *)psy_desc->properties + sizeof(gab_props)); /* * getting channel from iio and copying the battery properties @@ -295,15 +292,12 @@ static int gab_probe(struct platform_device *pdev) adc_bat->channel[chan] = NULL; } else { /* copying properties for supported channels only */ - memcpy(properties + sizeof(*(psy_desc->properties)) * index, - &gab_dyn_props[chan], - sizeof(gab_dyn_props[chan])); - index++; + psy_desc->properties[index++] = gab_dyn_props[chan]; } } /* none of the channels are supported so let's bail out */ - if (index == 0) { + if (index == ARRAY_SIZE(gab_props)) { ret = -ENODEV; goto second_mem_fail; } @@ -314,7 +308,7 @@ static int gab_probe(struct platform_device *pdev) * as come channels may be not be supported by the device.So * we need to take care of that. */ - psy_desc->num_properties = ARRAY_SIZE(gab_props) + index; + psy_desc->num_properties = index; adc_bat->psy = power_supply_register(&pdev->dev, psy_desc, &psy_cfg); if (IS_ERR(adc_bat->psy)) { -- GitLab From f9f67667e0aeeafa65b4542c3d67173871baedfd Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 26 Jun 2018 15:28:30 +0200 Subject: [PATCH 1195/1291] power: generic-adc-battery: check for duplicate properties copied from iio channels commit a427503edaaed9b75ed9746a654cece7e93e60a8 upstream. If an iio channel defines a basic property, there are duplicate entries in /sys/class/power/*/uevent. So add a check to avoid duplicates. Since all channels may be duplicates, we have to modify the related error check. Signed-off-by: H. Nikolaus Schaller Cc: stable@vger.kernel.org Fixes: e60fea794e6e ("power: battery: Generic battery driver using IIO") Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/generic-adc-battery.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/generic-adc-battery.c b/drivers/power/supply/generic-adc-battery.c index 9d3dbb5b8e98..371b5ec70087 100644 --- a/drivers/power/supply/generic-adc-battery.c +++ b/drivers/power/supply/generic-adc-battery.c @@ -246,6 +246,7 @@ static int gab_probe(struct platform_device *pdev) int ret = 0; int chan; int index = ARRAY_SIZE(gab_props); + bool any = false; adc_bat = devm_kzalloc(&pdev->dev, sizeof(*adc_bat), GFP_KERNEL); if (!adc_bat) { @@ -292,12 +293,22 @@ static int gab_probe(struct platform_device *pdev) adc_bat->channel[chan] = NULL; } else { /* copying properties for supported channels only */ - psy_desc->properties[index++] = gab_dyn_props[chan]; + int index2; + + for (index2 = 0; index2 < index; index2++) { + if (psy_desc->properties[index2] == + gab_dyn_props[chan]) + break; /* already known */ + } + if (index2 == index) /* really new */ + psy_desc->properties[index++] = + gab_dyn_props[chan]; + any = true; } } /* none of the channels are supported so let's bail out */ - if (index == ARRAY_SIZE(gab_props)) { + if (!any) { ret = -ENODEV; goto second_mem_fail; } -- GitLab From 63a0f9de021aac2491be8466e44f35b07e3127b1 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 21 Aug 2018 17:25:07 +0200 Subject: [PATCH 1196/1291] watchdog: Mark watchdog touch functions as notrace commit cb9d7fd51d9fbb329d182423bd7b92d0f8cb0e01 upstream. Some architectures need to use stop_machine() to patch functions for ftrace, and the assumption is that the stopped CPUs do not make function calls to traceable functions when they are in the stopped state. Commit ce4f06dcbb5d ("stop_machine: Touch_nmi_watchdog() after MULTI_STOP_PREPARE") added calls to the watchdog touch functions from the stopped CPUs and those functions lack notrace annotations. This leads to crashes when enabling/disabling ftrace on ARM kernels built with the Thumb-2 instruction set. Fix it by adding the necessary notrace annotations. Fixes: ce4f06dcbb5d ("stop_machine: Touch_nmi_watchdog() after MULTI_STOP_PREPARE") Signed-off-by: Vincent Whitchurch Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: oleg@redhat.com Cc: tj@kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180821152507.18313-1-vincent.whitchurch@axis.com Signed-off-by: Greg Kroah-Hartman --- kernel/watchdog.c | 4 ++-- kernel/watchdog_hld.c | 2 +- kernel/workqueue.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index c8e06703e44c..087994b23f8b 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -265,7 +265,7 @@ static void __touch_watchdog(void) * entering idle state. This should only be used for scheduler events. * Use touch_softlockup_watchdog() for everything else. */ -void touch_softlockup_watchdog_sched(void) +notrace void touch_softlockup_watchdog_sched(void) { /* * Preemption can be enabled. It doesn't matter which CPU's timestamp @@ -274,7 +274,7 @@ void touch_softlockup_watchdog_sched(void) raw_cpu_write(watchdog_touch_ts, 0); } -void touch_softlockup_watchdog(void) +notrace void touch_softlockup_watchdog(void) { touch_softlockup_watchdog_sched(); wq_watchdog_touch(raw_smp_processor_id()); diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index e449a23e9d59..4ece6028007a 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -29,7 +29,7 @@ static struct cpumask dead_events_mask; static unsigned long hardlockup_allcpu_dumped; static atomic_t watchdog_cpus = ATOMIC_INIT(0); -void arch_touch_nmi_watchdog(void) +notrace void arch_touch_nmi_watchdog(void) { /* * Using __raw here because some code paths have diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d8a7f8939c81..08bc551976b2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5484,7 +5484,7 @@ static void wq_watchdog_timer_fn(unsigned long data) mod_timer(&wq_watchdog_timer, jiffies + thresh); } -void wq_watchdog_touch(int cpu) +notrace void wq_watchdog_touch(int cpu) { if (cpu >= 0) per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies; -- GitLab From 73b2e7073b51de0b03ebd15c97dd3ad0c3470810 Mon Sep 17 00:00:00 2001 From: Scott Bauer Date: Thu, 26 Apr 2018 11:51:08 -0600 Subject: [PATCH 1197/1291] cdrom: Fix info leak/OOB read in cdrom_ioctl_drive_status commit 8f3fafc9c2f0ece10832c25f7ffcb07c97a32ad4 upstream. Like d88b6d04: "cdrom: information leak in cdrom_ioctl_media_changed()" There is another cast from unsigned long to int which causes a bounds check to fail with specially crafted input. The value is then used as an index in the slot array in cdrom_slot_status(). Signed-off-by: Scott Bauer Signed-off-by: Scott Bauer Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/cdrom/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index bfc566d3f31a..8cfa10ab7abc 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2542,7 +2542,7 @@ static int cdrom_ioctl_drive_status(struct cdrom_device_info *cdi, if (!CDROM_CAN(CDC_SELECT_DISC) || (arg == CDSL_CURRENT || arg == CDSL_NONE)) return cdi->ops->drive_status(cdi, CDSL_CURRENT); - if (((int)arg >= cdi->capacity)) + if (arg >= cdi->capacity) return -EINVAL; return cdrom_slot_status(cdi, arg); } -- GitLab From 616d41d1b4087a85ec1091f6a7f4460911083f44 Mon Sep 17 00:00:00 2001 From: Valdis Kletnieks Date: Sun, 4 Feb 2018 12:01:43 -0500 Subject: [PATCH 1198/1291] gcc-plugins: Add include required by GCC release 8 commit 80d172431696482d9acd8d2c4ea78fed8956e2a1 upstream. GCC requires another #include to get the gcc-plugins to build cleanly. Signed-off-by: Valdis Kletnieks Signed-off-by: Kees Cook Cc: Lance Albertson Signed-off-by: Greg Kroah-Hartman --- scripts/gcc-plugins/gcc-common.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h index ffd1dfaa1cc1..f46750053377 100644 --- a/scripts/gcc-plugins/gcc-common.h +++ b/scripts/gcc-plugins/gcc-common.h @@ -97,6 +97,10 @@ #include "predict.h" #include "ipa-utils.h" +#if BUILDING_GCC_VERSION >= 8000 +#include "stringpool.h" +#endif + #if BUILDING_GCC_VERSION >= 4009 #include "attribs.h" #include "varasm.h" -- GitLab From 77d1658e5dd111dc01972d57ce1984654cca1131 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 5 Feb 2018 17:27:46 -0800 Subject: [PATCH 1199/1291] gcc-plugins: Use dynamic initializers commit b86729109c5fd0a480300f40608aac68764b5adf upstream. GCC 8 changed the order of some fields and is very picky about ordering in static initializers, so instead just move to dynamic initializers, and drop the redundant already-zero field assignments. Suggested-by: Valdis Kletnieks Signed-off-by: Kees Cook Cc: Lance Albertson Signed-off-by: Greg Kroah-Hartman --- scripts/gcc-plugins/latent_entropy_plugin.c | 17 ++--- scripts/gcc-plugins/randomize_layout_plugin.c | 75 ++++++------------- scripts/gcc-plugins/structleak_plugin.c | 19 ++--- 3 files changed, 33 insertions(+), 78 deletions(-) diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index 65264960910d..cbe1d6c4b1a5 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -255,21 +255,14 @@ static tree handle_latent_entropy_attribute(tree *node, tree name, return NULL_TREE; } -static struct attribute_spec latent_entropy_attr = { - .name = "latent_entropy", - .min_length = 0, - .max_length = 0, - .decl_required = true, - .type_required = false, - .function_type_required = false, - .handler = handle_latent_entropy_attribute, -#if BUILDING_GCC_VERSION >= 4007 - .affects_type_identity = false -#endif -}; +static struct attribute_spec latent_entropy_attr = { }; static void register_attributes(void *event_data __unused, void *data __unused) { + latent_entropy_attr.name = "latent_entropy"; + latent_entropy_attr.decl_required = true; + latent_entropy_attr.handler = handle_latent_entropy_attribute; + register_attribute(&latent_entropy_attr); } diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 0073af326449..c4a345c3715b 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -580,68 +580,35 @@ static void finish_type(void *event_data, void *data) return; } -static struct attribute_spec randomize_layout_attr = { - .name = "randomize_layout", - // related to args - .min_length = 0, - .max_length = 0, - .decl_required = false, - // need type declaration - .type_required = true, - .function_type_required = false, - .handler = handle_randomize_layout_attr, -#if BUILDING_GCC_VERSION >= 4007 - .affects_type_identity = true -#endif -}; +static struct attribute_spec randomize_layout_attr = { }; +static struct attribute_spec no_randomize_layout_attr = { }; +static struct attribute_spec randomize_considered_attr = { }; +static struct attribute_spec randomize_performed_attr = { }; -static struct attribute_spec no_randomize_layout_attr = { - .name = "no_randomize_layout", - // related to args - .min_length = 0, - .max_length = 0, - .decl_required = false, - // need type declaration - .type_required = true, - .function_type_required = false, - .handler = handle_randomize_layout_attr, +static void register_attributes(void *event_data, void *data) +{ + randomize_layout_attr.name = "randomize_layout"; + randomize_layout_attr.type_required = true; + randomize_layout_attr.handler = handle_randomize_layout_attr; #if BUILDING_GCC_VERSION >= 4007 - .affects_type_identity = true + randomize_layout_attr.affects_type_identity = true; #endif -}; -static struct attribute_spec randomize_considered_attr = { - .name = "randomize_considered", - // related to args - .min_length = 0, - .max_length = 0, - .decl_required = false, - // need type declaration - .type_required = true, - .function_type_required = false, - .handler = handle_randomize_considered_attr, + no_randomize_layout_attr.name = "no_randomize_layout"; + no_randomize_layout_attr.type_required = true; + no_randomize_layout_attr.handler = handle_randomize_layout_attr; #if BUILDING_GCC_VERSION >= 4007 - .affects_type_identity = false + no_randomize_layout_attr.affects_type_identity = true; #endif -}; -static struct attribute_spec randomize_performed_attr = { - .name = "randomize_performed", - // related to args - .min_length = 0, - .max_length = 0, - .decl_required = false, - // need type declaration - .type_required = true, - .function_type_required = false, - .handler = handle_randomize_performed_attr, -#if BUILDING_GCC_VERSION >= 4007 - .affects_type_identity = false -#endif -}; + randomize_considered_attr.name = "randomize_considered"; + randomize_considered_attr.type_required = true; + randomize_considered_attr.handler = handle_randomize_considered_attr; + + randomize_performed_attr.name = "randomize_performed"; + randomize_performed_attr.type_required = true; + randomize_performed_attr.handler = handle_randomize_performed_attr; -static void register_attributes(void *event_data, void *data) -{ register_attribute(&randomize_layout_attr); register_attribute(&no_randomize_layout_attr); register_attribute(&randomize_considered_attr); diff --git a/scripts/gcc-plugins/structleak_plugin.c b/scripts/gcc-plugins/structleak_plugin.c index 3f8dd4868178..10292f791e99 100644 --- a/scripts/gcc-plugins/structleak_plugin.c +++ b/scripts/gcc-plugins/structleak_plugin.c @@ -57,21 +57,16 @@ static tree handle_user_attribute(tree *node, tree name, tree args, int flags, b return NULL_TREE; } -static struct attribute_spec user_attr = { - .name = "user", - .min_length = 0, - .max_length = 0, - .decl_required = false, - .type_required = false, - .function_type_required = false, - .handler = handle_user_attribute, -#if BUILDING_GCC_VERSION >= 4007 - .affects_type_identity = true -#endif -}; +static struct attribute_spec user_attr = { }; static void register_attributes(void *event_data, void *data) { + user_attr.name = "user"; + user_attr.handler = handle_user_attribute; +#if BUILDING_GCC_VERSION >= 4007 + user_attr.affects_type_identity = true; +#endif + register_attribute(&user_attr); } -- GitLab From ee13f7edca5838436feefde90ed1b2ebb07c4184 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 5 Sep 2018 09:26:42 +0200 Subject: [PATCH 1200/1291] Linux 4.14.68 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 93faeeece331..3da579058926 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 67 +SUBLEVEL = 68 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 54c0fa829d45f9ad40e40a47d7a899d47dc23569 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 14 Jul 2018 12:52:10 -0400 Subject: [PATCH 1201/1291] net: 6lowpan: fix reserved space for single frames commit ac74f87c789af40936a80131c4759f3e72579c3a upstream. This patch fixes patch add handling to take care tail and headroom for single 6lowpan frames. We need to be sure we have a skb with the right head and tailroom for single frames. This patch do it by using skb_copy_expand() if head and tailroom is not enough allocated by upper layer. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195059 Reported-by: David Palma Reported-by: Rabi Narayan Sahoo Cc: stable@vger.kernel.org Signed-off-by: Alexander Aring Signed-off-by: Stefan Schmidt Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/tx.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c index e6ff5128e61a..ca53efa17be1 100644 --- a/net/ieee802154/6lowpan/tx.c +++ b/net/ieee802154/6lowpan/tx.c @@ -265,9 +265,24 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev) /* We must take a copy of the skb before we modify/replace the ipv6 * header as the header could be used elsewhere */ - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) - return NET_XMIT_DROP; + if (unlikely(skb_headroom(skb) < ldev->needed_headroom || + skb_tailroom(skb) < ldev->needed_tailroom)) { + struct sk_buff *nskb; + + nskb = skb_copy_expand(skb, ldev->needed_headroom, + ldev->needed_tailroom, GFP_ATOMIC); + if (likely(nskb)) { + consume_skb(skb); + skb = nskb; + } else { + kfree_skb(skb); + return NET_XMIT_DROP; + } + } else { + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + return NET_XMIT_DROP; + } ret = lowpan_header(skb, ldev, &dgram_size, &dgram_offset); if (ret < 0) { -- GitLab From 4a2262972b5dab51e1e09dee8992d29b4946c1ab Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Jul 2018 16:32:03 -0400 Subject: [PATCH 1202/1291] net: mac802154: tx: expand tailroom if necessary commit f9c52831133050c6b82aa8b6831c92da2bbf2a0b upstream. This patch is necessary if case of AF_PACKET or other socket interface which I am aware of it and didn't allocated the necessary room. Reported-by: David Palma Reported-by: Rabi Narayan Sahoo Cc: stable@vger.kernel.org Signed-off-by: Alexander Aring Signed-off-by: Stefan Schmidt Signed-off-by: Greg Kroah-Hartman --- net/mac802154/tx.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index 7e253455f9dd..bcd1a5e6ebf4 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -63,8 +63,21 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) int ret; if (!(local->hw.flags & IEEE802154_HW_TX_OMIT_CKSUM)) { - u16 crc = crc_ccitt(0, skb->data, skb->len); + struct sk_buff *nskb; + u16 crc; + + if (unlikely(skb_tailroom(skb) < IEEE802154_FCS_LEN)) { + nskb = skb_copy_expand(skb, 0, IEEE802154_FCS_LEN, + GFP_ATOMIC); + if (likely(nskb)) { + consume_skb(skb); + skb = nskb; + } else { + goto err_tx; + } + } + crc = crc_ccitt(0, skb->data, skb->len); put_unaligned_le16(crc, skb_put(skb, 2)); } -- GitLab From 4d3016e56c3d2fdbd5b1a984c4316da3cfbf63a9 Mon Sep 17 00:00:00 2001 From: Chirantan Ekbote Date: Mon, 16 Jul 2018 17:35:29 -0700 Subject: [PATCH 1203/1291] 9p/net: Fix zero-copy path in the 9p virtio transport commit d28c756caee6e414d9ba367d0b92da24145af2a8 upstream. The zero-copy optimization when reading or writing large chunks of data is quite useful. However, the 9p messages created through the zero-copy write path have an incorrect message size: it should be the size of the header + size of the data being written but instead it's just the size of the header. This only works if the server ignores the size field of the message and otherwise breaks the framing of the protocol. Fix this by re-writing the message size field with the correct value. Tested by running `dd if=/dev/zero of=out bs=4k count=1` inside a virtio-9p mount. Link: http://lkml.kernel.org/r/20180717003529.114368-1-chirantan@chromium.org Signed-off-by: Chirantan Ekbote Reviewed-by: Greg Kurz Tested-by: Greg Kurz Cc: Dylan Reid Cc: Guenter Roeck Cc: stable@vger.kernel.org Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_virtio.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 3aa5a93ad107..202c8ef1c0fa 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -406,6 +406,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, p9_debug(P9_DEBUG_TRANS, "virtio request\n"); if (uodata) { + __le32 sz; int n = p9_get_mapped_pages(chan, &out_pages, uodata, outlen, &offs, &need_drop); if (n < 0) @@ -416,6 +417,12 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4); outlen = n; } + /* The size field of the message must include the length of the + * header and the length of the data. We didn't actually know + * the length of the data until this point so add it in now. + */ + sz = cpu_to_le32(req->tc->size + outlen); + memcpy(&req->tc->sdata[0], &sz, sizeof(sz)); } else if (uidata) { int n = p9_get_mapped_pages(chan, &in_pages, uidata, inlen, &offs, &need_drop); -- GitLab From aa1d05c504832dbb17855dd4bd858b1cb263ca7b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 10 Aug 2018 11:13:52 +0200 Subject: [PATCH 1204/1291] spi: davinci: fix a NULL pointer dereference commit 563a53f3906a6b43692498e5b3ae891fac93a4af upstream. On non-OF systems spi->controlled_data may be NULL. This causes a NULL pointer derefence on dm365-evm. Signed-off-by: Bartosz Golaszewski Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-davinci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c index 6ddb6ef1fda4..c5bbe08771a4 100644 --- a/drivers/spi/spi-davinci.c +++ b/drivers/spi/spi-davinci.c @@ -217,7 +217,7 @@ static void davinci_spi_chipselect(struct spi_device *spi, int value) pdata = &dspi->pdata; /* program delay transfers if tx_delay is non zero */ - if (spicfg->wdelay) + if (spicfg && spicfg->wdelay) spidat1 |= SPIDAT1_WDEL; /* -- GitLab From 23554cab1ebdd2bb1c1cde5bbb3d3064a85bb131 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 28 Jun 2018 13:52:23 +0300 Subject: [PATCH 1205/1291] spi: pxa2xx: Add support for Intel Ice Lake commit 22d71a5097ec7059b6cbbee678a4f88484695941 upstream. Intel Ice Lake SPI host controller follows the Intel Cannon Lake but the PCI IDs are different. Add the new PCI IDs to the driver supported devices list. Signed-off-by: Mika Westerberg Signed-off-by: Jarkko Nikula Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-pxa2xx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 4cb515a3104c..3a2e46e49405 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1480,6 +1480,10 @@ static const struct pci_device_id pxa2xx_spi_pci_compound_match[] = { { PCI_VDEVICE(INTEL, 0x31c2), LPSS_BXT_SSP }, { PCI_VDEVICE(INTEL, 0x31c4), LPSS_BXT_SSP }, { PCI_VDEVICE(INTEL, 0x31c6), LPSS_BXT_SSP }, + /* ICL-LP */ + { PCI_VDEVICE(INTEL, 0x34aa), LPSS_CNL_SSP }, + { PCI_VDEVICE(INTEL, 0x34ab), LPSS_CNL_SSP }, + { PCI_VDEVICE(INTEL, 0x34fb), LPSS_CNL_SSP }, /* APL */ { PCI_VDEVICE(INTEL, 0x5ac2), LPSS_BXT_SSP }, { PCI_VDEVICE(INTEL, 0x5ac4), LPSS_BXT_SSP }, -- GitLab From dc9a7dd57f63e56dcba88565c57cde7b55efb329 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 29 Jun 2018 13:33:09 +0200 Subject: [PATCH 1206/1291] spi: spi-fsl-dspi: Fix imprecise abort on VF500 during probe commit d8ffee2f551a627ffb7b216e2da322cb9a037f77 upstream. Registers of DSPI should not be accessed before enabling its clock. On Toradex Colibri VF50 on Iris carrier board this could be seen during bootup as imprecise abort: Unhandled fault: imprecise external abort (0x1c06) at 0x00000000 Internal error: : 1c06 [#1] ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.14.39-dirty #97 Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree) Backtrace: [<804166a8>] (regmap_write) from [<80466b5c>] (dspi_probe+0x1f0/0x8dc) [<8046696c>] (dspi_probe) from [<8040107c>] (platform_drv_probe+0x54/0xb8) [<80401028>] (platform_drv_probe) from [<803ff53c>] (driver_probe_device+0x280/0x2f8) [<803ff2bc>] (driver_probe_device) from [<803ff674>] (__driver_attach+0xc0/0xc4) [<803ff5b4>] (__driver_attach) from [<803fd818>] (bus_for_each_dev+0x70/0xa4) [<803fd7a8>] (bus_for_each_dev) from [<803fee74>] (driver_attach+0x24/0x28) [<803fee50>] (driver_attach) from [<803fe980>] (bus_add_driver+0x1a0/0x218) [<803fe7e0>] (bus_add_driver) from [<803fffe8>] (driver_register+0x80/0x100) [<803fff68>] (driver_register) from [<80400fdc>] (__platform_driver_register+0x48/0x50) [<80400f94>] (__platform_driver_register) from [<8091cf7c>] (fsl_dspi_driver_init+0x1c/0x20) [<8091cf60>] (fsl_dspi_driver_init) from [<8010195c>] (do_one_initcall+0x4c/0x174) [<80101910>] (do_one_initcall) from [<80900e8c>] (kernel_init_freeable+0x144/0x1d8) [<80900d48>] (kernel_init_freeable) from [<805ff6a8>] (kernel_init+0x10/0x114) [<805ff698>] (kernel_init) from [<80107be8>] (ret_from_fork+0x14/0x2c) Cc: Fixes: 5ee67b587a2b ("spi: dspi: clear SPI_SR before enable interrupt") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-fsl-dspi.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index d89127f4a46d..ca013dd4ff6b 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -1006,30 +1006,30 @@ static int dspi_probe(struct platform_device *pdev) goto out_master_put; } + dspi->clk = devm_clk_get(&pdev->dev, "dspi"); + if (IS_ERR(dspi->clk)) { + ret = PTR_ERR(dspi->clk); + dev_err(&pdev->dev, "unable to get clock\n"); + goto out_master_put; + } + ret = clk_prepare_enable(dspi->clk); + if (ret) + goto out_master_put; + dspi_init(dspi); dspi->irq = platform_get_irq(pdev, 0); if (dspi->irq < 0) { dev_err(&pdev->dev, "can't get platform irq\n"); ret = dspi->irq; - goto out_master_put; + goto out_clk_put; } ret = devm_request_irq(&pdev->dev, dspi->irq, dspi_interrupt, 0, pdev->name, dspi); if (ret < 0) { dev_err(&pdev->dev, "Unable to attach DSPI interrupt\n"); - goto out_master_put; - } - - dspi->clk = devm_clk_get(&pdev->dev, "dspi"); - if (IS_ERR(dspi->clk)) { - ret = PTR_ERR(dspi->clk); - dev_err(&pdev->dev, "unable to get clock\n"); - goto out_master_put; + goto out_clk_put; } - ret = clk_prepare_enable(dspi->clk); - if (ret) - goto out_master_put; if (dspi->devtype_data->trans_mode == DSPI_DMA_MODE) { ret = dspi_request_dma(dspi, res->start); -- GitLab From e7b6b3699b97a1e6b1ad31cd7c611dd92685c399 Mon Sep 17 00:00:00 2001 From: Janek Kotas Date: Mon, 4 Jun 2018 11:24:44 +0000 Subject: [PATCH 1207/1291] spi: cadence: Change usleep_range() to udelay(), for atomic context commit 931c4e9a72ae91d59c5332ffb6812911a749da8e upstream. The path "spi: cadence: Add usleep_range() for cdns_spi_fill_tx_fifo()" added a usleep_range() function call, which cannot be used in atomic context. However the cdns_spi_fill_tx_fifo() function can be called during an interrupt which may result in a kernel panic: BUG: scheduling while atomic: grep/561/0x00010002 Modules linked in: Preemption disabled at: [] wait_for_common+0x48/0x178 CPU: 0 PID: 561 Comm: grep Not tainted 4.17.0 #1 Hardware name: Cadence CSP (DT) Call trace: dump_backtrace+0x0/0x198 show_stack+0x14/0x20 dump_stack+0x8c/0xac __schedule_bug+0x6c/0xb8 __schedule+0x570/0x5d8 schedule+0x34/0x98 schedule_hrtimeout_range_clock+0x98/0x110 schedule_hrtimeout_range+0x10/0x18 usleep_range+0x64/0x98 cdns_spi_fill_tx_fifo+0x70/0xb0 cdns_spi_irq+0xd0/0xe0 __handle_irq_event_percpu+0x9c/0x128 handle_irq_event_percpu+0x34/0x88 handle_irq_event+0x48/0x78 handle_fasteoi_irq+0xbc/0x1b0 generic_handle_irq+0x24/0x38 __handle_domain_irq+0x84/0xf8 gic_handle_irq+0xc4/0x180 This patch replaces the function call with udelay() which can be used in an atomic context, like an interrupt. Signed-off-by: Jan Kotas Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-cadence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index 4a001634023e..02bd1eba045b 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -319,7 +319,7 @@ static void cdns_spi_fill_tx_fifo(struct cdns_spi *xspi) */ if (cdns_spi_read(xspi, CDNS_SPI_ISR) & CDNS_SPI_IXR_TXFULL) - usleep_range(10, 20); + udelay(10); if (xspi->txbuf) cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++); -- GitLab From 8513c01ae15f5ec456dda387e44317752c8da544 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 22 Aug 2018 21:28:01 +0300 Subject: [PATCH 1208/1291] mmc: renesas_sdhi_internal_dmac: fix #define RST_RESERVED_BITS commit 9faf870e559a710c44e747ba20383ea82d8ac5d2 upstream. The DM_CM_RST register actually has bits 0-31 defaulting to 1s and bits 32-63 defaulting to 0s -- fix off-by-one in #define RST_RESERVED_BITS. Signed-off-by: Sergei Shtylyov Reviewed-by: Wolfram Sang Fixes: 2a68ea7896e3 ("mmc: renesas-sdhi: add support for R-Car Gen3 SDHI DMAC") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/renesas_sdhi_internal_dmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index 8bae88a150fd..713658be6661 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -44,7 +44,7 @@ /* DM_CM_RST */ #define RST_DTRANRST1 BIT(9) #define RST_DTRANRST0 BIT(8) -#define RST_RESERVED_BITS GENMASK_ULL(32, 0) +#define RST_RESERVED_BITS GENMASK_ULL(31, 0) /* DM_CM_INFO1 and DM_CM_INFO1_MASK */ #define INFO1_CLEAR 0 -- GitLab From cf12d0f9c0dc9129f08490390d14ee4d9dbf6ebb Mon Sep 17 00:00:00 2001 From: Markus Stockhausen Date: Fri, 27 Jul 2018 09:09:53 -0600 Subject: [PATCH 1209/1291] readahead: stricter check for bdi io_pages commit dc30b96ab6d569060741572cf30517d3179429a8 upstream. ondemand_readahead() checks bdi->io_pages to cap the maximum pages that need to be processed. This works until the readit section. If we would do an async only readahead (async size = sync size) and target is at beginning of window we expand the pages by another get_next_ra_size() pages. Btrace for large reads shows that kernel always issues a doubled size read at the beginning of processing. Add an additional check for io_pages in the lower part of the func. The fix helps devices that hard limit bio pages and rely on proper handling of max_hw_read_sectors (e.g. older FusionIO cards). For that reason it could qualify for stable. Fixes: 9491ae4a ("mm: don't cap request size based on read-ahead setting") Cc: stable@vger.kernel.org Signed-off-by: Markus Stockhausen stockhausen@collogia.de Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- mm/readahead.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/readahead.c b/mm/readahead.c index c4ca70239233..59aa0d06f254 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -380,6 +380,7 @@ ondemand_readahead(struct address_space *mapping, { struct backing_dev_info *bdi = inode_to_bdi(mapping->host); unsigned long max_pages = ra->ra_pages; + unsigned long add_pages; pgoff_t prev_offset; /* @@ -469,10 +470,17 @@ ondemand_readahead(struct address_space *mapping, * Will this read hit the readahead marker made by itself? * If so, trigger the readahead marker hit now, and merge * the resulted next readahead window into the current one. + * Take care of maximum IO pages as above. */ if (offset == ra->start && ra->size == ra->async_size) { - ra->async_size = get_next_ra_size(ra, max_pages); - ra->size += ra->async_size; + add_pages = get_next_ra_size(ra, max_pages); + if (ra->size + add_pages <= max_pages) { + ra->async_size = add_pages; + ra->size += add_pages; + } else { + ra->size = max_pages; + ra->async_size = max_pages >> 1; + } } return ra_submit(ra, mapping, filp); -- GitLab From 0affbaece6d0b7c75c5166732d0481ae9a28be60 Mon Sep 17 00:00:00 2001 From: xiao jin Date: Mon, 30 Jul 2018 14:11:12 +0800 Subject: [PATCH 1210/1291] block: blk_init_allocated_queue() set q->fq as NULL in the fail case commit 54648cf1ec2d7f4b6a71767799c45676a138ca24 upstream. We find the memory use-after-free issue in __blk_drain_queue() on the kernel 4.14. After read the latest kernel 4.18-rc6 we think it has the same problem. Memory is allocated for q->fq in the blk_init_allocated_queue(). If the elevator init function called with error return, it will run into the fail case to free the q->fq. Then the __blk_drain_queue() uses the same memory after the free of the q->fq, it will lead to the unpredictable event. The patch is to set q->fq as NULL in the fail case of blk_init_allocated_queue(). Fixes: commit 7c94e1c157a2 ("block: introduce blk_flush_queue to drive flush machinery") Cc: Reviewed-by: Ming Lei Reviewed-by: Bart Van Assche Signed-off-by: xiao jin Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-core.c b/block/blk-core.c index 68bae6338ad4..c9134166dde6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1025,6 +1025,7 @@ int blk_init_allocated_queue(struct request_queue *q) q->exit_rq_fn(q, q->fq->flush_rq); out_free_flush_queue: blk_free_flush_queue(q->fq); + q->fq = NULL; return -ENOMEM; } EXPORT_SYMBOL(blk_init_allocated_queue); -- GitLab From 1e2698976822852b67c2dc11b2ebc2febc25955c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 30 Jul 2018 20:02:19 +0800 Subject: [PATCH 1211/1291] block: really disable runtime-pm for blk-mq commit b233f127042dba991229e3882c6217c80492f6ef upstream. Runtime PM isn't ready for blk-mq yet, and commit 765e40b675a9 ("block: disable runtime-pm for blk-mq") tried to disable it. Unfortunately, it can't take effect in that way since user space still can switch it on via 'echo auto > /sys/block/sdN/device/power/control'. This patch disables runtime-pm for blk-mq really by pm_runtime_disable() and fixes all kinds of PM related kernel crash. Cc: Tomas Janousek Cc: Przemek Socha Cc: Alan Stern Cc: Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Tested-by: Patrick Steinhardt Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index c9134166dde6..1d27e2a152e0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3459,9 +3459,11 @@ EXPORT_SYMBOL(blk_finish_plug); */ void blk_pm_runtime_init(struct request_queue *q, struct device *dev) { - /* not support for RQF_PM and ->rpm_status in blk-mq yet */ - if (q->mq_ops) + /* Don't enable runtime PM for blk-mq until it is ready */ + if (q->mq_ops) { + pm_runtime_disable(dev); return; + } q->dev = dev; q->rpm_status = RPM_ACTIVE; -- GitLab From 7188f7416438a34412a6702f548f81be9e2cf56c Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 2 May 2018 20:50:21 +0100 Subject: [PATCH 1212/1291] drm/i915/userptr: reject zero user_size commit c11c7bfd213495784b22ef82a69b6489f8d0092f upstream. Operating on a zero sized GEM userptr object will lead to explosions. Fixes: 5cc9ed4b9a7a ("drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl") Testcase: igt/gem_userptr_blits/input-checking Signed-off-by: Matthew Auld Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180502195021.30900-1-matthew.auld@intel.com Cc: Loic Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_userptr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 709efe2357ea..05ae8c4a8a1b 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -782,6 +782,9 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file I915_USERPTR_UNSYNCHRONIZED)) return -EINVAL; + if (!args->user_size) + return -EINVAL; + if (offset_in_page(args->user_ptr | args->user_size)) return -EINVAL; -- GitLab From fba6b7f4bbe24c45c62172d8f1a450cbe4ec8dc6 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 27 Jun 2018 20:58:45 +0200 Subject: [PATCH 1213/1291] libertas: fix suspend and resume for SDIO connected cards commit 7444a8092906ed44c09459780c56ba57043e39b1 upstream. Prior to commit 573185cc7e64 ("mmc: core: Invoke sdio func driver's PM callbacks from the sdio bus"), the MMC core used to call into the power management functions of SDIO clients itself and removed the card if the return code was non-zero. IOW, the mmc handled errors gracefully and didn't upchain them to the pm core. Since this change, the mmc core relies on generic power management functions which treat all errors as a reason to cancel the suspend immediately. This causes suspend attempts to fail when the libertas driver is loaded. To fix this, power down the card explicitly in if_sdio_suspend() when we know we're about to lose power and return success. Also set a flag in these cases, and power up the card again in if_sdio_resume(). Fixes: 573185cc7e64 ("mmc: core: Invoke sdio func driver's PM callbacks from the sdio bus") Cc: Signed-off-by: Daniel Mack Reviewed-by: Chris Ball Reviewed-by: Ulf Hansson Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/libertas/dev.h | 1 + .../net/wireless/marvell/libertas/if_sdio.c | 30 +++++++++++++++---- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/dev.h b/drivers/net/wireless/marvell/libertas/dev.h index dd1ee1f0af48..469134930026 100644 --- a/drivers/net/wireless/marvell/libertas/dev.h +++ b/drivers/net/wireless/marvell/libertas/dev.h @@ -104,6 +104,7 @@ struct lbs_private { u8 fw_ready; u8 surpriseremoved; u8 setup_fw_on_resume; + u8 power_up_on_resume; int (*hw_host_to_card) (struct lbs_private *priv, u8 type, u8 *payload, u16 nb); void (*reset_card) (struct lbs_private *priv); int (*power_save) (struct lbs_private *priv); diff --git a/drivers/net/wireless/marvell/libertas/if_sdio.c b/drivers/net/wireless/marvell/libertas/if_sdio.c index 2300e796c6ab..43743c26c071 100644 --- a/drivers/net/wireless/marvell/libertas/if_sdio.c +++ b/drivers/net/wireless/marvell/libertas/if_sdio.c @@ -1290,15 +1290,23 @@ static void if_sdio_remove(struct sdio_func *func) static int if_sdio_suspend(struct device *dev) { struct sdio_func *func = dev_to_sdio_func(dev); - int ret; struct if_sdio_card *card = sdio_get_drvdata(func); + struct lbs_private *priv = card->priv; + int ret; mmc_pm_flag_t flags = sdio_get_host_pm_caps(func); + priv->power_up_on_resume = false; /* If we're powered off anyway, just let the mmc layer remove the * card. */ - if (!lbs_iface_active(card->priv)) - return -ENOSYS; + if (!lbs_iface_active(priv)) { + if (priv->fw_ready) { + priv->power_up_on_resume = true; + if_sdio_power_off(card); + } + + return 0; + } dev_info(dev, "%s: suspend: PM flags = 0x%x\n", sdio_func_id(func), flags); @@ -1306,9 +1314,14 @@ static int if_sdio_suspend(struct device *dev) /* If we aren't being asked to wake on anything, we should bail out * and let the SD stack power down the card. */ - if (card->priv->wol_criteria == EHS_REMOVE_WAKEUP) { + if (priv->wol_criteria == EHS_REMOVE_WAKEUP) { dev_info(dev, "Suspend without wake params -- powering down card\n"); - return -ENOSYS; + if (priv->fw_ready) { + priv->power_up_on_resume = true; + if_sdio_power_off(card); + } + + return 0; } if (!(flags & MMC_PM_KEEP_POWER)) { @@ -1321,7 +1334,7 @@ static int if_sdio_suspend(struct device *dev) if (ret) return ret; - ret = lbs_suspend(card->priv); + ret = lbs_suspend(priv); if (ret) return ret; @@ -1336,6 +1349,11 @@ static int if_sdio_resume(struct device *dev) dev_info(dev, "%s: resume: we're back\n", sdio_func_id(func)); + if (card->priv->power_up_on_resume) { + if_sdio_power_on(card); + wait_event(card->pwron_waitq, card->priv->fw_ready); + } + ret = lbs_resume(card->priv); return ret; -- GitLab From c160382ab064ae53c4a939aa6a8989bf553f77f7 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sun, 10 Jun 2018 16:43:02 -0400 Subject: [PATCH 1214/1291] media: Revert "[media] tvp5150: fix pad format frame height" commit 1831af092308aa5a59ae61e47494e441c8be6b93 upstream. This reverts commit 0866df8dffd514185bfab0d205db76e4c02cf1e4. The v4l uAPI documentation [0] makes clear that in the case of interlaced video (i.e: field is V4L2_FIELD_ALTERNATE) the height refers to the number of lines in the field and not the number of lines in the full frame (which is twice the field height for interlaced formats). So the original height calculation was correct, and it shouldn't had been changed by the mentioned commit. [0]:https://linuxtv.org/downloads/v4l-dvb-apis/uapi/v4l/subdev-formats.html Fixes: 0866df8dffd5 ("[media] tvp5150: fix pad format frame height") Signed-off-by: Javier Martinez Canillas Cc: # for v4.12 and up Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/tvp5150.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 698fa764999c..59b0c1fce9be 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -871,7 +871,7 @@ static int tvp5150_fill_fmt(struct v4l2_subdev *sd, f = &format->format; f->width = decoder->rect.width; - f->height = decoder->rect.height; + f->height = decoder->rect.height / 2; f->code = MEDIA_BUS_FMT_UYVY8_2X8; f->field = V4L2_FIELD_ALTERNATE; -- GitLab From 9cbb32610233e3531ec74e17b44d702c1ca353f0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 26 Jul 2018 12:11:39 -0500 Subject: [PATCH 1215/1291] mailbox: xgene-slimpro: Fix potential NULL pointer dereference commit 3512a18cbd8d09e22a790540cb9624c3c49827ba upstream. There is a potential execution path in which function platform_get_resource() returns NULL. If this happens, we will end up having a NULL pointer dereference. Fix this by replacing devm_ioremap with devm_ioremap_resource, which has the NULL check and the memory region request. This code was detected with the help of Coccinelle. Cc: stable@vger.kernel.org Fixes: f700e84f417b ("mailbox: Add support for APM X-Gene platform mailbox driver") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jassi Brar Signed-off-by: Greg Kroah-Hartman --- drivers/mailbox/mailbox-xgene-slimpro.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mailbox/mailbox-xgene-slimpro.c b/drivers/mailbox/mailbox-xgene-slimpro.c index a7040163dd43..b8b2b3533f46 100644 --- a/drivers/mailbox/mailbox-xgene-slimpro.c +++ b/drivers/mailbox/mailbox-xgene-slimpro.c @@ -195,9 +195,9 @@ static int slimpro_mbox_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ctx); regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - mb_base = devm_ioremap(&pdev->dev, regs->start, resource_size(regs)); - if (!mb_base) - return -ENOMEM; + mb_base = devm_ioremap_resource(&pdev->dev, regs); + if (IS_ERR(mb_base)) + return PTR_ERR(mb_base); /* Setup mailbox links */ for (i = 0; i < MBOX_CNT; i++) { -- GitLab From f70805bef73eda436ad664deab818b3291cc8407 Mon Sep 17 00:00:00 2001 From: Yannik Sembritzki Date: Thu, 16 Aug 2018 14:05:10 +0100 Subject: [PATCH 1216/1291] Replace magic for trusting the secondary keyring with #define commit 817aef260037f33ee0f44c17fe341323d3aebd6d upstream. Replace the use of a magic number that indicates that verify_*_signature() should use the secondary keyring with a symbol. Signed-off-by: Yannik Sembritzki Signed-off-by: David Howells Cc: keyrings@vger.kernel.org Cc: linux-security-module@vger.kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- certs/system_keyring.c | 3 ++- crypto/asymmetric_keys/pkcs7_key_type.c | 2 +- include/linux/verification.h | 6 ++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/certs/system_keyring.c b/certs/system_keyring.c index 6251d1b27f0c..81728717523d 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -230,7 +231,7 @@ int verify_pkcs7_signature(const void *data, size_t len, if (!trusted_keys) { trusted_keys = builtin_trusted_keys; - } else if (trusted_keys == (void *)1UL) { + } else if (trusted_keys == VERIFY_USE_SECONDARY_KEYRING) { #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING trusted_keys = secondary_trusted_keys; #else diff --git a/crypto/asymmetric_keys/pkcs7_key_type.c b/crypto/asymmetric_keys/pkcs7_key_type.c index 1063b644efcd..b2aa925a84bc 100644 --- a/crypto/asymmetric_keys/pkcs7_key_type.c +++ b/crypto/asymmetric_keys/pkcs7_key_type.c @@ -62,7 +62,7 @@ static int pkcs7_preparse(struct key_preparsed_payload *prep) return verify_pkcs7_signature(NULL, 0, prep->data, prep->datalen, - (void *)1UL, usage, + VERIFY_USE_SECONDARY_KEYRING, usage, pkcs7_view_content, prep); } diff --git a/include/linux/verification.h b/include/linux/verification.h index a10549a6c7cd..cfa4730d607a 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -12,6 +12,12 @@ #ifndef _LINUX_VERIFICATION_H #define _LINUX_VERIFICATION_H +/* + * Indicate that both builtin trusted keys and secondary trusted keys + * should be used. + */ +#define VERIFY_USE_SECONDARY_KEYRING ((struct key *)1UL) + /* * The use to which an asymmetric key is being put. */ -- GitLab From 259c5122f3a80036a2e9774260a5bfecbeac7f76 Mon Sep 17 00:00:00 2001 From: Yannik Sembritzki Date: Thu, 16 Aug 2018 14:05:23 +0100 Subject: [PATCH 1217/1291] Fix kexec forbidding kernels signed with keys in the secondary keyring to boot commit ea93102f32244e3f45c8b26260be77ed0cc1d16c upstream. The split of .system_keyring into .builtin_trusted_keys and .secondary_trusted_keys broke kexec, thereby preventing kernels signed by keys which are now in the secondary keyring from being kexec'd. Fix this by passing VERIFY_USE_SECONDARY_KEYRING to verify_pefile_signature(). Fixes: d3bfe84129f6 ("certs: Add a secondary system keyring that can be added to dynamically") Signed-off-by: Yannik Sembritzki Signed-off-by: David Howells Cc: kexec@lists.infradead.org Cc: keyrings@vger.kernel.org Cc: linux-security-module@vger.kernel.org Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kexec-bzimage64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index f24cd9f1799a..928b0c6083c9 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -532,7 +532,7 @@ static int bzImage64_cleanup(void *loader_data) static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) { return verify_pefile_signature(kernel, kernel_len, - NULL, + VERIFY_USE_SECONDARY_KEYRING, VERIFYING_KEXEC_PE_SIGNATURE); } #endif -- GitLab From ed53c0ecc959c0f3941324b03231c07e1e4afa64 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Tue, 7 Aug 2018 02:12:45 +0530 Subject: [PATCH 1218/1291] powerpc/fadump: handle crash memory ranges array index overflow commit 1bd6a1c4b80a28d975287630644e6b47d0f977a5 upstream. Crash memory ranges is an array of memory ranges of the crashing kernel to be exported as a dump via /proc/vmcore file. The size of the array is set based on INIT_MEMBLOCK_REGIONS, which works alright in most cases where memblock memory regions count is less than INIT_MEMBLOCK_REGIONS value. But this count can grow beyond INIT_MEMBLOCK_REGIONS value since commit 142b45a72e22 ("memblock: Add array resizing support"). On large memory systems with a few DLPAR operations, the memblock memory regions count could be larger than INIT_MEMBLOCK_REGIONS value. On such systems, registering fadump results in crash or other system failures like below: task: c00007f39a290010 ti: c00000000b738000 task.ti: c00000000b738000 NIP: c000000000047df4 LR: c0000000000f9e58 CTR: c00000000010f180 REGS: c00000000b73b570 TRAP: 0300 Tainted: G L X (4.4.140+) MSR: 8000000000009033 CR: 22004484 XER: 20000000 CFAR: c000000000008500 DAR: 000007a450000000 DSISR: 40000000 SOFTE: 0 ... NIP [c000000000047df4] smp_send_reschedule+0x24/0x80 LR [c0000000000f9e58] resched_curr+0x138/0x160 Call Trace: resched_curr+0x138/0x160 (unreliable) check_preempt_curr+0xc8/0xf0 ttwu_do_wakeup+0x38/0x150 try_to_wake_up+0x224/0x4d0 __wake_up_common+0x94/0x100 ep_poll_callback+0xac/0x1c0 __wake_up_common+0x94/0x100 __wake_up_sync_key+0x70/0xa0 sock_def_readable+0x58/0xa0 unix_stream_sendmsg+0x2dc/0x4c0 sock_sendmsg+0x68/0xa0 ___sys_sendmsg+0x2cc/0x2e0 __sys_sendmsg+0x5c/0xc0 SyS_socketcall+0x36c/0x3f0 system_call+0x3c/0x100 as array index overflow is not checked for while setting up crash memory ranges causing memory corruption. To resolve this issue, dynamically allocate memory for crash memory ranges and resize it incrementally, in units of pagesize, on hitting array size limit. Fixes: 2df173d9e85d ("fadump: Initialize elfcore header and add PT_LOAD program headers.") Cc: stable@vger.kernel.org # v3.4+ Signed-off-by: Hari Bathini Reviewed-by: Mahesh Salgaonkar [mpe: Just use PAGE_SIZE directly, fixup variable placement] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/fadump.h | 3 - arch/powerpc/kernel/fadump.c | 91 ++++++++++++++++++++++++++----- 2 files changed, 77 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 5a23010af600..1e7a33592e29 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -195,9 +195,6 @@ struct fadump_crash_info_header { struct cpumask online_mask; }; -/* Crash memory ranges */ -#define INIT_CRASHMEM_RANGES (INIT_MEMBLOCK_REGIONS + 2) - struct fad_crash_memory_ranges { unsigned long long base; unsigned long long size; diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index d0020bc1f209..5a6470383ca3 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -47,8 +47,10 @@ static struct fadump_mem_struct fdm; static const struct fadump_mem_struct *fdm_active; static DEFINE_MUTEX(fadump_mutex); -struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES]; +struct fad_crash_memory_ranges *crash_memory_ranges; +int crash_memory_ranges_size; int crash_mem_ranges; +int max_crash_mem_ranges; /* Scan the Firmware Assisted dump configuration details. */ int __init early_init_dt_scan_fw_dump(unsigned long node, @@ -843,38 +845,88 @@ static int __init process_fadump(const struct fadump_mem_struct *fdm_active) return 0; } -static inline void fadump_add_crash_memory(unsigned long long base, - unsigned long long end) +static void free_crash_memory_ranges(void) +{ + kfree(crash_memory_ranges); + crash_memory_ranges = NULL; + crash_memory_ranges_size = 0; + max_crash_mem_ranges = 0; +} + +/* + * Allocate or reallocate crash memory ranges array in incremental units + * of PAGE_SIZE. + */ +static int allocate_crash_memory_ranges(void) +{ + struct fad_crash_memory_ranges *new_array; + u64 new_size; + + new_size = crash_memory_ranges_size + PAGE_SIZE; + pr_debug("Allocating %llu bytes of memory for crash memory ranges\n", + new_size); + + new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL); + if (new_array == NULL) { + pr_err("Insufficient memory for setting up crash memory ranges\n"); + free_crash_memory_ranges(); + return -ENOMEM; + } + + crash_memory_ranges = new_array; + crash_memory_ranges_size = new_size; + max_crash_mem_ranges = (new_size / + sizeof(struct fad_crash_memory_ranges)); + return 0; +} + +static inline int fadump_add_crash_memory(unsigned long long base, + unsigned long long end) { if (base == end) - return; + return 0; + + if (crash_mem_ranges == max_crash_mem_ranges) { + int ret; + + ret = allocate_crash_memory_ranges(); + if (ret) + return ret; + } pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n", crash_mem_ranges, base, end - 1, (end - base)); crash_memory_ranges[crash_mem_ranges].base = base; crash_memory_ranges[crash_mem_ranges].size = end - base; crash_mem_ranges++; + return 0; } -static void fadump_exclude_reserved_area(unsigned long long start, +static int fadump_exclude_reserved_area(unsigned long long start, unsigned long long end) { unsigned long long ra_start, ra_end; + int ret = 0; ra_start = fw_dump.reserve_dump_area_start; ra_end = ra_start + fw_dump.reserve_dump_area_size; if ((ra_start < end) && (ra_end > start)) { if ((start < ra_start) && (end > ra_end)) { - fadump_add_crash_memory(start, ra_start); - fadump_add_crash_memory(ra_end, end); + ret = fadump_add_crash_memory(start, ra_start); + if (ret) + return ret; + + ret = fadump_add_crash_memory(ra_end, end); } else if (start < ra_start) { - fadump_add_crash_memory(start, ra_start); + ret = fadump_add_crash_memory(start, ra_start); } else if (ra_end < end) { - fadump_add_crash_memory(ra_end, end); + ret = fadump_add_crash_memory(ra_end, end); } } else - fadump_add_crash_memory(start, end); + ret = fadump_add_crash_memory(start, end); + + return ret; } static int fadump_init_elfcore_header(char *bufp) @@ -914,10 +966,11 @@ static int fadump_init_elfcore_header(char *bufp) * Traverse through memblock structure and setup crash memory ranges. These * ranges will be used create PT_LOAD program headers in elfcore header. */ -static void fadump_setup_crash_memory_ranges(void) +static int fadump_setup_crash_memory_ranges(void) { struct memblock_region *reg; unsigned long long start, end; + int ret; pr_debug("Setup crash memory ranges.\n"); crash_mem_ranges = 0; @@ -928,7 +981,9 @@ static void fadump_setup_crash_memory_ranges(void) * specified during fadump registration. We need to create a separate * program header for this chunk with the correct offset. */ - fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); + ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); + if (ret) + return ret; for_each_memblock(memory, reg) { start = (unsigned long long)reg->base; @@ -948,8 +1003,12 @@ static void fadump_setup_crash_memory_ranges(void) } /* add this range excluding the reserved dump area. */ - fadump_exclude_reserved_area(start, end); + ret = fadump_exclude_reserved_area(start, end); + if (ret) + return ret; } + + return 0; } /* @@ -1072,6 +1131,7 @@ static int register_fadump(void) { unsigned long addr; void *vaddr; + int ret; /* * If no memory is reserved then we can not register for firmware- @@ -1080,7 +1140,9 @@ static int register_fadump(void) if (!fw_dump.reserve_dump_area_size) return -ENODEV; - fadump_setup_crash_memory_ranges(); + ret = fadump_setup_crash_memory_ranges(); + if (ret) + return ret; addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len); /* Initialize fadump crash info header. */ @@ -1158,6 +1220,7 @@ void fadump_cleanup(void) } else if (fw_dump.dump_registered) { /* Un-register Firmware-assisted dump if it was registered. */ fadump_unregister_dump(&fdm); + free_crash_memory_ranges(); } } -- GitLab From 4bfd910e8fb3b86660243ac0cf1b31df842b54b2 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 7 Aug 2018 19:46:46 +0530 Subject: [PATCH 1219/1291] powerpc/pseries: Fix endianness while restoring of r3 in MCE handler. commit cd813e1cd7122f2c261dce5b54d1e0c97f80e1a5 upstream. During Machine Check interrupt on pseries platform, register r3 points RTAS extended event log passed by hypervisor. Since hypervisor uses r3 to pass pointer to rtas log, it stores the original r3 value at the start of the memory (first 8 bytes) pointed by r3. Since hypervisor stores this info and rtas log is in BE format, linux should make sure to restore r3 value in correct endian format. Without this patch when MCE handler, after recovery, returns to code that that caused the MCE may end up with Data SLB access interrupt for invalid address followed by kernel panic or hang. Severe Machine check interrupt [Recovered] NIP [d00000000ca301b8]: init_module+0x1b8/0x338 [bork_kernel] Initiator: CPU Error type: SLB [Multihit] Effective address: d00000000ca70000 cpu 0xa: Vector: 380 (Data SLB Access) at [c0000000fc7775b0] pc: c0000000009694c0: vsnprintf+0x80/0x480 lr: c0000000009698e0: vscnprintf+0x20/0x60 sp: c0000000fc777830 msr: 8000000002009033 dar: a803a30c000000d0 current = 0xc00000000bc9ef00 paca = 0xc00000001eca5c00 softe: 3 irq_happened: 0x01 pid = 8860, comm = insmod vscnprintf+0x20/0x60 vprintk_emit+0xb4/0x4b0 vprintk_func+0x5c/0xd0 printk+0x38/0x4c init_module+0x1c0/0x338 [bork_kernel] do_one_initcall+0x54/0x230 do_init_module+0x8c/0x248 load_module+0x12b8/0x15b0 sys_finit_module+0xa8/0x110 system_call+0x58/0x6c --- Exception: c00 (System Call) at 00007fff8bda0644 SP (7fffdfbfe980) is in userspace This patch fixes this issue. Fixes: a08a53ea4c97 ("powerpc/le: Enable RTAS events support") Cc: stable@vger.kernel.org # v3.15+ Reviewed-by: Nicholas Piggin Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/ras.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 5e1ef9150182..2edc673be137 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -360,7 +360,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) } savep = __va(regs->gpr[3]); - regs->gpr[3] = savep[0]; /* restore original r3 */ + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ /* If it isn't an extended log we can use the per cpu 64bit buffer */ h = (struct rtas_error_log *)&savep[1]; -- GitLab From 15677df25afc6e4d27731c9c47888807588f7ffc Mon Sep 17 00:00:00 2001 From: Frederick Lawler Date: Thu, 18 Jan 2018 12:55:24 -0600 Subject: [PATCH 1220/1291] PCI: Add wrappers for dev_printk() commit 7506dc7989933235e6fc23f3d0516bdbf0f7d1a8 upstream. Add PCI-specific dev_printk() wrappers and use them to simplify the code slightly. No functional change intended. Signed-off-by: Frederick Lawler [bhelgaas: squash into one patch] Signed-off-by: Bjorn Helgaas [only take the pci.h portion of this patch, to make backporting stuff easier over time - gregkh] Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/pci.h b/include/linux/pci.h index 9d6fae809c09..b1abbcc614cf 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2292,4 +2292,16 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) /* provide the legacy pci_dma_* API */ #include +#define pci_printk(level, pdev, fmt, arg...) \ + dev_printk(level, &(pdev)->dev, fmt, ##arg) + +#define pci_emerg(pdev, fmt, arg...) dev_emerg(&(pdev)->dev, fmt, ##arg) +#define pci_alert(pdev, fmt, arg...) dev_alert(&(pdev)->dev, fmt, ##arg) +#define pci_crit(pdev, fmt, arg...) dev_crit(&(pdev)->dev, fmt, ##arg) +#define pci_err(pdev, fmt, arg...) dev_err(&(pdev)->dev, fmt, ##arg) +#define pci_warn(pdev, fmt, arg...) dev_warn(&(pdev)->dev, fmt, ##arg) +#define pci_notice(pdev, fmt, arg...) dev_notice(&(pdev)->dev, fmt, ##arg) +#define pci_info(pdev, fmt, arg...) dev_info(&(pdev)->dev, fmt, ##arg) +#define pci_dbg(pdev, fmt, arg...) dev_dbg(&(pdev)->dev, fmt, ##arg) + #endif /* LINUX_PCI_H */ -- GitLab From 1eb08e7b192d2c412175f607cf51449c916abd57 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 17 Aug 2018 17:30:39 +1000 Subject: [PATCH 1221/1291] powerpc/powernv/pci: Work around races in PCI bridge enabling commit db2173198b9513f7add8009f225afa1f1c79bcc6 upstream. The generic code is racy when multiple children of a PCI bridge try to enable it simultaneously. This leads to drivers trying to access a device through a not-yet-enabled bridge, and this EEH errors under various circumstances when using parallel driver probing. There is work going on to fix that properly in the PCI core but it will take some time. x86 gets away with it because (outside of hotplug), the BIOS enables all the bridges at boot time. This patch does the same thing on powernv by enabling all bridges that have child devices at boot time, thus avoiding subsequent races. It's suitable for backporting to stable and distros, while the proper PCI fix will probably be significantly more invasive. Signed-off-by: Benjamin Herrenschmidt Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/pci-ioda.c | 37 +++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 677b29ef4532..e919696c7137 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3286,12 +3286,49 @@ static void pnv_pci_ioda_create_dbgfs(void) #endif /* CONFIG_DEBUG_FS */ } +static void pnv_pci_enable_bridge(struct pci_bus *bus) +{ + struct pci_dev *dev = bus->self; + struct pci_bus *child; + + /* Empty bus ? bail */ + if (list_empty(&bus->devices)) + return; + + /* + * If there's a bridge associated with that bus enable it. This works + * around races in the generic code if the enabling is done during + * parallel probing. This can be removed once those races have been + * fixed. + */ + if (dev) { + int rc = pci_enable_device(dev); + if (rc) + pci_err(dev, "Error enabling bridge (%d)\n", rc); + pci_set_master(dev); + } + + /* Perform the same to child busses */ + list_for_each_entry(child, &bus->children, node) + pnv_pci_enable_bridge(child); +} + +static void pnv_pci_enable_bridges(void) +{ + struct pci_controller *hose; + + list_for_each_entry(hose, &hose_list, list_node) + pnv_pci_enable_bridge(hose->bus); +} + static void pnv_pci_ioda_fixup(void) { pnv_pci_ioda_setup_PEs(); pnv_pci_ioda_setup_iommu_api(); pnv_pci_ioda_create_dbgfs(); + pnv_pci_enable_bridges(); + #ifdef CONFIG_EEH eeh_init(); eeh_addr_cache_build(); -- GitLab From 241e62005c742a238c525d0dbb80e1c8cb735466 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Wed, 4 Jul 2018 20:58:33 +0530 Subject: [PATCH 1222/1291] cxl: Fix wrong comparison in cxl_adapter_context_get() commit ef6cb5f1a048fdf91ccee6d63d2bfa293338502d upstream. Function atomic_inc_unless_negative() returns a bool to indicate success/failure. However cxl_adapter_context_get() wrongly compares the return value against '>=0' which will always be true. The patch fixes this comparison to '==0' there by also fixing this compile time warning: drivers/misc/cxl/main.c:290 cxl_adapter_context_get() warn: 'atomic_inc_unless_negative(&adapter->contexts_num)' is unsigned Fixes: 70b565bbdb91 ("cxl: Prevent adapter reset if an active context exists") Cc: stable@vger.kernel.org # v4.9+ Reported-by: Dan Carpenter Signed-off-by: Vaibhav Jain Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index c1ba0d42cbc8..e0f29b8a872d 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -287,7 +287,7 @@ int cxl_adapter_context_get(struct cxl *adapter) int rc; rc = atomic_inc_unless_negative(&adapter->contexts_num); - return rc >= 0 ? 0 : -EBUSY; + return rc ? 0 : -EBUSY; } void cxl_adapter_context_put(struct cxl *adapter) -- GitLab From bac5c3c122f43114f925fb53f2e5edd86413e423 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 2 Jul 2018 14:08:18 -0700 Subject: [PATCH 1223/1291] ib_srpt: Fix a use-after-free in srpt_close_ch() commit 995250959d22fc341b5424e3343b0ce5df672461 upstream. Avoid that KASAN reports the following: BUG: KASAN: use-after-free in srpt_close_ch+0x4f/0x1b0 [ib_srpt] Read of size 4 at addr ffff880151180cb8 by task check/4681 CPU: 15 PID: 4681 Comm: check Not tainted 4.18.0-rc2-dbg+ #4 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0xa4/0xf5 print_address_description+0x6f/0x270 kasan_report+0x241/0x360 __asan_load4+0x78/0x80 srpt_close_ch+0x4f/0x1b0 [ib_srpt] srpt_set_enabled+0xf7/0x1e0 [ib_srpt] srpt_tpg_enable_store+0xb8/0x120 [ib_srpt] configfs_write_file+0x14e/0x1d0 [configfs] __vfs_write+0xd2/0x3b0 vfs_write+0x101/0x270 ksys_write+0xab/0x120 __x64_sys_write+0x43/0x50 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: aaf45bd83eba ("IB/srpt: Detect session shutdown reliably") Signed-off-by: Bart Van Assche Cc: Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 97c2225829ea..60105ba77889 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1713,8 +1713,7 @@ static bool srpt_close_ch(struct srpt_rdma_ch *ch) int ret; if (!srpt_set_ch_state(ch, CH_DRAINING)) { - pr_debug("%s-%d: already closed\n", ch->sess_name, - ch->qp->qp_num); + pr_debug("%s: already closed\n", ch->sess_name); return false; } -- GitLab From def89b81ef459e98d3e4adce8de78561c3be14f0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 26 Jun 2018 08:39:36 -0700 Subject: [PATCH 1224/1291] RDMA/rxe: Set wqe->status correctly if an unexpected response is received commit 61b717d041b1976530f68f8b539b2e3a7dd8e39c upstream. Every function that returns COMPST_ERROR must set wqe->status to another value than IB_WC_SUCCESS before returning COMPST_ERROR. Fix the only code path for which this is not yet the case. Signed-off-by: Bart Van Assche Cc: Reviewed-by: Yuval Shaia Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_comp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 9eb12c2e3c74..83cfe44f070e 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -276,6 +276,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: if (wqe->wr.opcode != IB_WR_RDMA_READ && wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { + wqe->status = IB_WC_FATAL_ERR; return COMPST_ERROR; } reset_retry_counters(qp); -- GitLab From 390671089d6214542fb8bd8e2f7700647c18017f Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Fri, 27 Jul 2018 13:05:58 +0200 Subject: [PATCH 1225/1291] 9p: fix multiple NULL-pointer-dereferences commit 10aa14527f458e9867cf3d2cc6b8cb0f6704448b upstream. Added checks to prevent GPFs from raising. Link: http://lkml.kernel.org/r/20180727110558.5479-1-tomasbortoli@gmail.com Signed-off-by: Tomas Bortoli Reported-by: syzbot+1a262da37d3bead15c39@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_fd.c | 5 ++++- net/9p/trans_rdma.c | 3 +++ net/9p/trans_virtio.c | 3 +++ net/9p/trans_xen.c | 3 +++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 985046ae4231..0102bfd57fe4 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -951,7 +951,7 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) if (err < 0) return err; - if (valid_ipaddr4(addr) < 0) + if (addr == NULL || valid_ipaddr4(addr) < 0) return -EINVAL; csocket = NULL; @@ -1001,6 +1001,9 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) csocket = NULL; + if (addr == NULL) + return -EINVAL; + if (strlen(addr) >= UNIX_PATH_MAX) { pr_err("%s (%d): address too long: %s\n", __func__, task_pid_nr(current), addr); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 6d8e3031978f..f58467a49090 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -646,6 +646,9 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; + if (addr == NULL) + return -EINVAL; + /* Parse the transport specific mount options */ err = parse_opts(args, &opts); if (err < 0) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 202c8ef1c0fa..a03336ede2ca 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -650,6 +650,9 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) int ret = -ENOENT; int found = 0; + if (devname == NULL) + return -EINVAL; + mutex_lock(&virtio_9p_lock); list_for_each_entry(chan, &virtio_chan_list, chan_list) { if (!strncmp(devname, chan->tag, chan->tag_len) && diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 325c56043007..c10bdf63eae7 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -95,6 +95,9 @@ static int p9_xen_create(struct p9_client *client, const char *addr, char *args) { struct xen_9pfs_front_priv *priv; + if (addr == NULL) + return -EINVAL; + read_lock(&xen_9pfs_lock); list_for_each_entry(priv, &xen_9pfs_devs, list) { if (!strcmp(priv->tag, addr)) { -- GitLab From 4827a583871a225cfce62a52d779e2a31f310925 Mon Sep 17 00:00:00 2001 From: piaojun Date: Wed, 25 Jul 2018 11:13:16 +0800 Subject: [PATCH 1226/1291] fs/9p/xattr.c: catch the error of p9_client_clunk when setting xattr failed commit 3111784bee81591ea2815011688d28b65df03627 upstream. In my testing, v9fs_fid_xattr_set will return successfully even if the backend ext4 filesystem has no space to store xattr key-value. That will cause inconsistent behavior between front end and back end. The reason is that lsetxattr will be triggered by p9_client_clunk, and unfortunately we did not catch the error. This patch will catch the error to notify upper caller. p9_client_clunk (in 9p) p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid); v9fs_clunk (in qemu) put_fid free_fid v9fs_xattr_fid_clunk v9fs_co_lsetxattr s->ops->lsetxattr ext4_xattr_user_set (in host ext4 filesystem) Link: http://lkml.kernel.org/r/5B57EACC.2060900@huawei.com Signed-off-by: Jun Piao Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Cc: Andrew Morton Cc: stable@vger.kernel.org Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- fs/9p/xattr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index f329eee6dc93..352abc39e891 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -105,7 +105,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, { struct kvec kvec = {.iov_base = (void *)value, .iov_len = value_len}; struct iov_iter from; - int retval; + int retval, err; iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len); @@ -126,7 +126,9 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, retval); else p9_client_write(fid, 0, &from, &retval); - p9_client_clunk(fid); + err = p9_client_clunk(fid); + if (!retval && err) + retval = err; return retval; } -- GitLab From f92953b0765bf465261782d47af6a4b38c20fa8c Mon Sep 17 00:00:00 2001 From: jiangyiwen Date: Fri, 3 Aug 2018 12:11:34 +0800 Subject: [PATCH 1227/1291] 9p/virtio: fix off-by-one error in sg list bounds check commit 23cba9cbde0bba05d772b335fe5f66aa82b9ad19 upstream. Because the value of limit is VIRTQUEUE_NUM, if index is equal to limit, it will cause sg array out of bounds, so correct the judgement of BUG_ON. Link: http://lkml.kernel.org/r/5B63D5F6.6080109@huawei.com Signed-off-by: Yiwen Jiang Reported-By: Dan Carpenter Acked-by: Jun Piao Cc: stable@vger.kernel.org Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_virtio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index a03336ede2ca..da0d3b257459 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -189,7 +189,7 @@ static int pack_sg_list(struct scatterlist *sg, int start, s = rest_of_page(data); if (s > count) s = count; - BUG_ON(index > limit); + BUG_ON(index >= limit); /* Make sure we don't terminate early. */ sg_unmark_end(&sg[index]); sg_set_buf(&sg[index++], data, s); @@ -234,6 +234,7 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, s = PAGE_SIZE - data_off; if (s > count) s = count; + BUG_ON(index >= limit); /* Make sure we don't terminate early. */ sg_unmark_end(&sg[index]); sg_set_page(&sg[index++], pdata[i++], s, data_off); -- GitLab From 312479e068288b4a15b869b049e4f8bfacd6e36a Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Tue, 10 Jul 2018 00:29:43 +0200 Subject: [PATCH 1228/1291] net/9p/client.c: version pointer uninitialized commit 7913690dcc5e18e235769fd87c34143072f5dbea upstream. The p9_client_version() does not initialize the version pointer. If the call to p9pdu_readf() returns an error and version has not been allocated in p9pdu_readf(), then the program will jump to the "error" label and will try to free the version pointer. If version is not initialized, free() will be called with uninitialized, garbage data and will provoke a crash. Link: http://lkml.kernel.org/r/20180709222943.19503-1-tomasbortoli@gmail.com Signed-off-by: Tomas Bortoli Reported-by: syzbot+65c6b72f284a39d416b4@syzkaller.appspotmail.com Reviewed-by: Jun Piao Reviewed-by: Yiwen Jiang Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Andrew Morton Cc: stable@vger.kernel.org Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- net/9p/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/9p/client.c b/net/9p/client.c index b433aff5ff13..3ec5a82929b2 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -955,7 +955,7 @@ static int p9_client_version(struct p9_client *c) { int err = 0; struct p9_req_t *req; - char *version; + char *version = NULL; int msize; p9_debug(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n", -- GitLab From 5f04d296f24b8a89c48eba775260d772834f9a6c Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Fri, 20 Jul 2018 11:27:30 +0200 Subject: [PATCH 1229/1291] net/9p/trans_fd.c: fix race-condition by flushing workqueue before the kfree() commit 430ac66eb4c5b5c4eb846b78ebf65747510b30f1 upstream. The patch adds the flush in p9_mux_poll_stop() as it the function used by p9_conn_destroy(), in turn called by p9_fd_close() to stop the async polling associated with the data regarding the connection. Link: http://lkml.kernel.org/r/20180720092730.27104-1-tomasbortoli@gmail.com Signed-off-by: Tomas Bortoli Reported-by: syzbot+39749ed7d9ef6dfb23f6@syzkaller.appspotmail.com To: Eric Van Hensbergen To: Ron Minnich To: Latchesar Ionkov Cc: Yiwen Jiang Cc: stable@vger.kernel.org Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_fd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 0102bfd57fe4..38e21a1e97bc 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -185,6 +185,8 @@ static void p9_mux_poll_stop(struct p9_conn *m) spin_lock_irqsave(&p9_poll_lock, flags); list_del_init(&m->poll_pending_link); spin_unlock_irqrestore(&p9_poll_lock, flags); + + flush_work(&p9_poll_work); } /** -- GitLab From 4f4b1c5c4c8a566ba5debc9e1a613de5e540c9ae Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 3 Jul 2018 20:13:25 +0200 Subject: [PATCH 1230/1291] dm integrity: change 'suspending' variable from bool to int commit c21b16392701543d61e366dca84e15fe7f0cf0cf upstream. Early alpha processors can't write a byte or short atomically - they read 8 bytes, modify the byte or two bytes in registers and write back 8 bytes. The modification of the variable "suspending" may race with modification of the variable "failed". Fix this by changing "suspending" to an int. Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index cbc56372ff97..898286ed47a1 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -177,7 +177,7 @@ struct dm_integrity_c { __u8 sectors_per_block; unsigned char mode; - bool suspending; + int suspending; int failed; @@ -2209,7 +2209,7 @@ static void dm_integrity_postsuspend(struct dm_target *ti) del_timer_sync(&ic->autocommit_timer); - ic->suspending = true; + WRITE_ONCE(ic->suspending, 1); queue_work(ic->commit_wq, &ic->commit_work); drain_workqueue(ic->commit_wq); @@ -2219,7 +2219,7 @@ static void dm_integrity_postsuspend(struct dm_target *ti) dm_integrity_flush_buffers(ic); } - ic->suspending = false; + WRITE_ONCE(ic->suspending, 0); BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); -- GitLab From 3bef88257145fd4f8abf0c0f92ecdeafbd96739e Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Thu, 2 Aug 2018 16:18:24 +0800 Subject: [PATCH 1231/1291] dm thin: stop no_space_timeout worker when switching to write-mode commit 75294442d896f2767be34f75aca7cc2b0d01301f upstream. Now both check_for_space() and do_no_space_timeout() will read & write pool->pf.error_if_no_space. If these functions run concurrently, as shown in the following case, the default setting of "queue_if_no_space" can get lost. precondition: * error_if_no_space = false (aka "queue_if_no_space") * pool is in Out-of-Data-Space (OODS) mode * no_space_timeout worker has been queued CPU 0: CPU 1: // delete a thin device process_delete_mesg() // check_for_space() invoked by commit() set_pool_mode(pool, PM_WRITE) pool->pf.error_if_no_space = \ pt->requested_pf.error_if_no_space // timeout, pool is still in OODS mode do_no_space_timeout // "queue_if_no_space" config is lost pool->pf.error_if_no_space = true pool->pf.mode = new_mode Fix it by stopping no_space_timeout worker when switching to write mode. Fixes: bcc696fac11f ("dm thin: stay in out-of-data-space mode once no_space_timeout expires") Cc: stable@vger.kernel.org Signed-off-by: Hou Tao Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 72ae5dc50532..6cf9ad4e4e16 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2514,6 +2514,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) case PM_WRITE: if (old_mode != new_mode) notify_of_pool_mode_change(pool, "write"); + if (old_mode == PM_OUT_OF_DATA_SPACE) + cancel_delayed_work_sync(&pool->no_space_timeout); pool->out_of_data_space = false; pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space; dm_pool_metadata_read_write(pool->pmd); -- GitLab From b7227e6044be9e0a0617346cd54baa384298b18b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 2 Aug 2018 16:08:52 -0400 Subject: [PATCH 1232/1291] dm cache metadata: save in-core policy_hint_size to on-disk superblock commit fd2fa95416188a767a63979296fa3e169a9ef5ec upstream. policy_hint_size starts as 0 during __write_initial_superblock(). It isn't until the policy is loaded that policy_hint_size is set in-core (cmd->policy_hint_size). But it never got recorded in the on-disk superblock because __commit_transaction() didn't deal with transfering the in-core cmd->policy_hint_size to the on-disk superblock. The in-core cmd->policy_hint_size gets initialized by metadata_open()'s __begin_transaction_flags() which re-reads all superblock fields. Because the superblock's policy_hint_size was never properly stored, when the cache was created, hints_array_available() would always return false when re-activating a previously created cache. This means __load_mappings() always considered the hints invalid and never made use of the hints (these hints served to optimize). Another detremental side-effect of this oversight is the cache_check utility would fail with: "invalid hint width: 0" Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-metadata.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 4a4e9c75fc4c..98f58eeb7dc5 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -362,7 +362,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) disk_super->version = cpu_to_le32(cmd->version); memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); - disk_super->policy_hint_size = 0; + disk_super->policy_hint_size = cpu_to_le32(0); __copy_sm_root(cmd, disk_super); @@ -700,6 +700,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]); disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]); + disk_super->policy_hint_size = cpu_to_le32(cmd->policy_hint_size); disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits); disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); -- GitLab From f961be8944130e94ce32c662a748e6f386d666ca Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 9 Aug 2018 12:38:28 +0200 Subject: [PATCH 1233/1291] dm cache metadata: set dirty on all cache blocks after a crash commit 5b1fe7bec8a8d0cc547a22e7ddc2bd59acd67de4 upstream. Quoting Documentation/device-mapper/cache.txt: The 'dirty' state for a cache block changes far too frequently for us to keep updating it on the fly. So we treat it as a hint. In normal operation it will be written when the dm device is suspended. If the system crashes all cache blocks will be assumed dirty when restarted. This got broken in commit f177940a8091 ("dm cache metadata: switch to using the new cursor api for loading metadata") in 4.9, which removed the code that consulted cmd->clean_when_opened (CLEAN_SHUTDOWN on-disk flag) when loading cache blocks. This results in data corruption on an unclean shutdown with dirty cache blocks on the fast device. After the crash those blocks are considered clean and may get evicted from the cache at any time. This can be demonstrated by doing a lot of reads to trigger individual evictions, but uncache is more predictable: ### Disable auto-activation in lvm.conf to be able to do uncache in ### time (i.e. see uncache doing flushing) when the fix is applied. # xfs_io -d -c 'pwrite -b 4M -S 0xaa 0 1G' /dev/vdb # vgcreate vg_cache /dev/vdb /dev/vdc # lvcreate -L 1G -n lv_slowdev vg_cache /dev/vdb # lvcreate -L 512M -n lv_cachedev vg_cache /dev/vdc # lvcreate -L 256M -n lv_metadev vg_cache /dev/vdc # lvconvert --type cache-pool --cachemode writeback vg_cache/lv_cachedev --poolmetadata vg_cache/lv_metadev # lvconvert --type cache vg_cache/lv_slowdev --cachepool vg_cache/lv_cachedev # xfs_io -d -c 'pwrite -b 4M -S 0xbb 0 512M' /dev/mapper/vg_cache-lv_slowdev # xfs_io -d -c 'pread -v 254M 512' /dev/mapper/vg_cache-lv_slowdev | head -n 2 0fe00000: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ 0fe00010: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ # dmsetup status vg_cache-lv_slowdev 0 2097152 cache 8 27/65536 128 8192/8192 1 100 0 0 0 8192 7065 2 metadata2 writeback 2 migration_threshold 2048 smq 0 rw - ^^^^ 7065 * 64k = 441M yet to be written to the slow device # echo b >/proc/sysrq-trigger # vgchange -ay vg_cache # xfs_io -d -c 'pread -v 254M 512' /dev/mapper/vg_cache-lv_slowdev | head -n 2 0fe00000: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ 0fe00010: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ # lvconvert --uncache vg_cache/lv_slowdev Flushing 0 blocks for cache vg_cache/lv_slowdev. Logical volume "lv_cachedev" successfully removed Logical volume vg_cache/lv_slowdev is not cached. # xfs_io -d -c 'pread -v 254M 512' /dev/mapper/vg_cache-lv_slowdev | head -n 2 0fe00000: aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa ................ 0fe00010: aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa ................ This is the case with both v1 and v2 cache pool metatata formats. After applying this patch: # vgchange -ay vg_cache # xfs_io -d -c 'pread -v 254M 512' /dev/mapper/vg_cache-lv_slowdev | head -n 2 0fe00000: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ 0fe00010: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ # lvconvert --uncache vg_cache/lv_slowdev Flushing 3724 blocks for cache vg_cache/lv_slowdev. ... Flushing 71 blocks for cache vg_cache/lv_slowdev. Logical volume "lv_cachedev" successfully removed Logical volume vg_cache/lv_slowdev is not cached. # xfs_io -d -c 'pread -v 254M 512' /dev/mapper/vg_cache-lv_slowdev | head -n 2 0fe00000: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ 0fe00010: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ Cc: stable@vger.kernel.org Fixes: f177940a8091 ("dm cache metadata: switch to using the new cursor api for loading metadata") Signed-off-by: Ilya Dryomov Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-metadata.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 98f58eeb7dc5..0a5a45f3ec5f 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -1322,6 +1322,7 @@ static int __load_mapping_v1(struct dm_cache_metadata *cmd, dm_oblock_t oblock; unsigned flags; + bool dirty = true; dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le); memcpy(&mapping, mapping_value_le, sizeof(mapping)); @@ -1332,8 +1333,10 @@ static int __load_mapping_v1(struct dm_cache_metadata *cmd, dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le); memcpy(&hint, hint_value_le, sizeof(hint)); } + if (cmd->clean_when_opened) + dirty = flags & M_DIRTY; - r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY, + r = fn(context, oblock, to_cblock(cb), dirty, le32_to_cpu(hint), hints_valid); if (r) { DMERR("policy couldn't load cache block %llu", @@ -1361,7 +1364,7 @@ static int __load_mapping_v2(struct dm_cache_metadata *cmd, dm_oblock_t oblock; unsigned flags; - bool dirty; + bool dirty = true; dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le); memcpy(&mapping, mapping_value_le, sizeof(mapping)); @@ -1372,8 +1375,9 @@ static int __load_mapping_v2(struct dm_cache_metadata *cmd, dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le); memcpy(&hint, hint_value_le, sizeof(hint)); } + if (cmd->clean_when_opened) + dirty = dm_bitset_cursor_get_value(dirty_cursor); - dirty = dm_bitset_cursor_get_value(dirty_cursor); r = fn(context, oblock, to_cblock(cb), dirty, le32_to_cpu(hint), hints_valid); if (r) { -- GitLab From 5044eb05026e9138ad3a31879bc752009f9248eb Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Aug 2018 11:23:56 -0400 Subject: [PATCH 1234/1291] dm crypt: don't decrease device limits commit bc9e9cf0401f18e33b78d4c8a518661b8346baf7 upstream. dm-crypt should only increase device limits, it should not decrease them. This fixes a bug where the user could creates a crypt device with 1024 sector size on the top of scsi device that had 4096 logical block size. The limit 4096 would be lost and the user could incorrectly send 1024-I/Os to the crypt device. Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f575110454b6..c60d29d09687 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3072,11 +3072,11 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) */ limits->max_segment_size = PAGE_SIZE; - if (cc->sector_size != (1 << SECTOR_SHIFT)) { - limits->logical_block_size = cc->sector_size; - limits->physical_block_size = cc->sector_size; - blk_limits_io_min(limits, cc->sector_size); - } + limits->logical_block_size = + max_t(unsigned short, limits->logical_block_size, cc->sector_size); + limits->physical_block_size = + max_t(unsigned, limits->physical_block_size, cc->sector_size); + limits->io_min = max_t(unsigned, limits->io_min, cc->sector_size); } static struct target_type crypt_target = { -- GitLab From d286cfd4527711d7711674b4214099c42d216e85 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Fri, 6 Jul 2018 10:24:57 -0600 Subject: [PATCH 1235/1291] uart: fix race between uart_put_char() and uart_shutdown() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a5ba1d95e46ecaea638ddd7cd144107c783acb5d upstream. We have reports of the following crash: PID: 7 TASK: ffff88085c6d61c0 CPU: 1 COMMAND: "kworker/u25:0" #0 [ffff88085c6db710] machine_kexec at ffffffff81046239 #1 [ffff88085c6db760] crash_kexec at ffffffff810fc248 #2 [ffff88085c6db830] oops_end at ffffffff81008ae7 #3 [ffff88085c6db860] no_context at ffffffff81050b8f #4 [ffff88085c6db8b0] __bad_area_nosemaphore at ffffffff81050d75 #5 [ffff88085c6db900] bad_area_nosemaphore at ffffffff81050e83 #6 [ffff88085c6db910] __do_page_fault at ffffffff8105132e #7 [ffff88085c6db9b0] do_page_fault at ffffffff8105152c #8 [ffff88085c6db9c0] page_fault at ffffffff81a3f122 [exception RIP: uart_put_char+149] RIP: ffffffff814b67b5 RSP: ffff88085c6dba78 RFLAGS: 00010006 RAX: 0000000000000292 RBX: ffffffff827c5120 RCX: 0000000000000081 RDX: 0000000000000000 RSI: 000000000000005f RDI: ffffffff827c5120 RBP: ffff88085c6dba98 R8: 000000000000012c R9: ffffffff822ea320 R10: ffff88085fe4db04 R11: 0000000000000001 R12: ffff881059f9c000 R13: 0000000000000001 R14: 000000000000005f R15: 0000000000000fba ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #9 [ffff88085c6dbaa0] tty_put_char at ffffffff81497544 #10 [ffff88085c6dbac0] do_output_char at ffffffff8149c91c #11 [ffff88085c6dbae0] __process_echoes at ffffffff8149cb8b #12 [ffff88085c6dbb30] commit_echoes at ffffffff8149cdc2 #13 [ffff88085c6dbb60] n_tty_receive_buf_fast at ffffffff8149e49b #14 [ffff88085c6dbbc0] __receive_buf at ffffffff8149ef5a #15 [ffff88085c6dbc20] n_tty_receive_buf_common at ffffffff8149f016 #16 [ffff88085c6dbca0] n_tty_receive_buf2 at ffffffff8149f194 #17 [ffff88085c6dbcb0] flush_to_ldisc at ffffffff814a238a #18 [ffff88085c6dbd50] process_one_work at ffffffff81090be2 #19 [ffff88085c6dbe20] worker_thread at ffffffff81091b4d #20 [ffff88085c6dbeb0] kthread at ffffffff81096384 #21 [ffff88085c6dbf50] ret_from_fork at ffffffff81a3d69f​ after slogging through some dissasembly: ffffffff814b6720 : ffffffff814b6720: 55 push %rbp ffffffff814b6721: 48 89 e5 mov %rsp,%rbp ffffffff814b6724: 48 83 ec 20 sub $0x20,%rsp ffffffff814b6728: 48 89 1c 24 mov %rbx,(%rsp) ffffffff814b672c: 4c 89 64 24 08 mov %r12,0x8(%rsp) ffffffff814b6731: 4c 89 6c 24 10 mov %r13,0x10(%rsp) ffffffff814b6736: 4c 89 74 24 18 mov %r14,0x18(%rsp) ffffffff814b673b: e8 b0 8e 58 00 callq ffffffff81a3f5f0 ffffffff814b6740: 4c 8b a7 88 02 00 00 mov 0x288(%rdi),%r12 ffffffff814b6747: 45 31 ed xor %r13d,%r13d ffffffff814b674a: 41 89 f6 mov %esi,%r14d ffffffff814b674d: 49 83 bc 24 70 01 00 cmpq $0x0,0x170(%r12) ffffffff814b6754: 00 00 ffffffff814b6756: 49 8b 9c 24 80 01 00 mov 0x180(%r12),%rbx ffffffff814b675d: 00 ffffffff814b675e: 74 2f je ffffffff814b678f ffffffff814b6760: 48 89 df mov %rbx,%rdi ffffffff814b6763: e8 a8 67 58 00 callq ffffffff81a3cf10 <_raw_spin_lock_irqsave> ffffffff814b6768: 41 8b 8c 24 78 01 00 mov 0x178(%r12),%ecx ffffffff814b676f: 00 ffffffff814b6770: 89 ca mov %ecx,%edx ffffffff814b6772: f7 d2 not %edx ffffffff814b6774: 41 03 94 24 7c 01 00 add 0x17c(%r12),%edx ffffffff814b677b: 00 ffffffff814b677c: 81 e2 ff 0f 00 00 and $0xfff,%edx ffffffff814b6782: 75 23 jne ffffffff814b67a7 ffffffff814b6784: 48 89 c6 mov %rax,%rsi ffffffff814b6787: 48 89 df mov %rbx,%rdi ffffffff814b678a: e8 e1 64 58 00 callq ffffffff81a3cc70 <_raw_spin_unlock_irqrestore> ffffffff814b678f: 44 89 e8 mov %r13d,%eax ffffffff814b6792: 48 8b 1c 24 mov (%rsp),%rbx ffffffff814b6796: 4c 8b 64 24 08 mov 0x8(%rsp),%r12 ffffffff814b679b: 4c 8b 6c 24 10 mov 0x10(%rsp),%r13 ffffffff814b67a0: 4c 8b 74 24 18 mov 0x18(%rsp),%r14 ffffffff814b67a5: c9 leaveq ffffffff814b67a6: c3 retq ffffffff814b67a7: 49 8b 94 24 70 01 00 mov 0x170(%r12),%rdx ffffffff814b67ae: 00 ffffffff814b67af: 48 63 c9 movslq %ecx,%rcx ffffffff814b67b2: 41 b5 01 mov $0x1,%r13b ffffffff814b67b5: 44 88 34 0a mov %r14b,(%rdx,%rcx,1) ffffffff814b67b9: 41 8b 94 24 78 01 00 mov 0x178(%r12),%edx ffffffff814b67c0: 00 ffffffff814b67c1: 83 c2 01 add $0x1,%edx ffffffff814b67c4: 81 e2 ff 0f 00 00 and $0xfff,%edx ffffffff814b67ca: 41 89 94 24 78 01 00 mov %edx,0x178(%r12) ffffffff814b67d1: 00 ffffffff814b67d2: eb b0 jmp ffffffff814b6784 ffffffff814b67d4: 66 66 66 2e 0f 1f 84 data32 data32 nopw %cs:0x0(%rax,%rax,1) ffffffff814b67db: 00 00 00 00 00 for our build, this is crashing at: circ->buf[circ->head] = c; Looking in uart_port_startup(), it seems that circ->buf (state->xmit.buf) protected by the "per-port mutex", which based on uart_port_check() is state->port.mutex. Indeed, the lock acquired in uart_put_char() is uport->lock, i.e. not the same lock. Anyway, since the lock is not acquired, if uart_shutdown() is called, the last chunk of that function may release state->xmit.buf before its assigned to null, and cause the race above. To fix it, let's lock uport->lock when allocating/deallocating state->xmit.buf in addition to the per-port mutex. v2: switch to locking uport->lock on allocation/deallocation instead of locking the per-port mutex in uart_put_char. Note that since uport->lock is a spin lock, we have to switch the allocation to GFP_ATOMIC. v3: move the allocation outside the lock, so we can switch back to GFP_KERNEL Signed-off-by: Tycho Andersen Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index c8cb0b398cb1..6db8844ef3ec 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -195,6 +195,7 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state, { struct uart_port *uport = uart_port_check(state); unsigned long page; + unsigned long flags = 0; int retval = 0; if (uport->type == PORT_UNKNOWN) @@ -209,15 +210,18 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state, * Initialise and allocate the transmit and temporary * buffer. */ - if (!state->xmit.buf) { - /* This is protected by the per port mutex */ - page = get_zeroed_page(GFP_KERNEL); - if (!page) - return -ENOMEM; + page = get_zeroed_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + uart_port_lock(state, flags); + if (!state->xmit.buf) { state->xmit.buf = (unsigned char *) page; uart_circ_clear(&state->xmit); + } else { + free_page(page); } + uart_port_unlock(uport, flags); retval = uport->ops->startup(uport); if (retval == 0) { @@ -276,6 +280,7 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state) { struct uart_port *uport = uart_port_check(state); struct tty_port *port = &state->port; + unsigned long flags = 0; /* * Set the TTY IO error marker @@ -308,10 +313,12 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state) /* * Free the transmit buffer page. */ + uart_port_lock(state, flags); if (state->xmit.buf) { free_page((unsigned long)state->xmit.buf); state->xmit.buf = NULL; } + uart_port_unlock(uport, flags); } /** -- GitLab From 91b48a9ced0683d0f69385a2d08b70574603fb27 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 2 Aug 2018 03:08:23 +0000 Subject: [PATCH 1236/1291] Drivers: hv: vmbus: Reset the channel callback in vmbus_onoffer_rescind() commit d3b26dd7cb0e3433bfd3c1d4dcf74c6039bb49fb upstream. Before setting channel->rescind in vmbus_rescind_cleanup(), we should make sure the channel callback won't run any more, otherwise a high-level driver like pci_hyperv, which may be infinitely waiting for the host VSP's response and notices the channel has been rescinded, can't safely give up: e.g., in hv_pci_protocol_negotiation() -> wait_for_response(), it's unsafe to exit from wait_for_response() and proceed with the on-stack variable "comp_pkt" popped. The issue was originally spotted by Michael Kelley . In vmbus_close_internal(), the patch also minimizes the range protected by disabling/enabling channel->callback_event: we don't really need that for the whole function. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Cc: stable@vger.kernel.org Cc: K. Y. Srinivasan Cc: Stephen Hemminger Cc: Michael Kelley Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 40 +++++++++++++++++++++++---------------- drivers/hv/channel_mgmt.c | 6 ++++++ include/linux/hyperv.h | 2 ++ 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 05964347008d..d96b09fea835 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -541,11 +541,8 @@ static void reset_channel_cb(void *arg) channel->onchannel_callback = NULL; } -static int vmbus_close_internal(struct vmbus_channel *channel) +void vmbus_reset_channel_cb(struct vmbus_channel *channel) { - struct vmbus_channel_close_channel *msg; - int ret; - /* * vmbus_on_event(), running in the per-channel tasklet, can race * with vmbus_close_internal() in the case of SMP guest, e.g., when @@ -555,6 +552,29 @@ static int vmbus_close_internal(struct vmbus_channel *channel) */ tasklet_disable(&channel->callback_event); + channel->sc_creation_callback = NULL; + + /* Stop the callback asap */ + if (channel->target_cpu != get_cpu()) { + put_cpu(); + smp_call_function_single(channel->target_cpu, reset_channel_cb, + channel, true); + } else { + reset_channel_cb(channel); + put_cpu(); + } + + /* Re-enable tasklet for use on re-open */ + tasklet_enable(&channel->callback_event); +} + +static int vmbus_close_internal(struct vmbus_channel *channel) +{ + struct vmbus_channel_close_channel *msg; + int ret; + + vmbus_reset_channel_cb(channel); + /* * In case a device driver's probe() fails (e.g., * util_probe() -> vmbus_open() returns -ENOMEM) and the device is @@ -568,16 +588,6 @@ static int vmbus_close_internal(struct vmbus_channel *channel) } channel->state = CHANNEL_OPEN_STATE; - channel->sc_creation_callback = NULL; - /* Stop callback and cancel the timer asap */ - if (channel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(channel->target_cpu, reset_channel_cb, - channel, true); - } else { - reset_channel_cb(channel); - put_cpu(); - } /* Send a closing message */ @@ -620,8 +630,6 @@ static int vmbus_close_internal(struct vmbus_channel *channel) get_order(channel->ringbuffer_pagecount * PAGE_SIZE)); out: - /* re-enable tasklet for use on re-open */ - tasklet_enable(&channel->callback_event); return ret; } diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 1939c0ca3741..1700b4e7758d 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -881,6 +881,12 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) return; } + /* + * Before setting channel->rescind in vmbus_rescind_cleanup(), we + * should make sure the channel callback is not running any more. + */ + vmbus_reset_channel_cb(channel); + /* * Now wait for offer handling to complete. */ diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index ba74eaa8eadf..0c51f753652d 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1026,6 +1026,8 @@ extern int vmbus_establish_gpadl(struct vmbus_channel *channel, extern int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle); +void vmbus_reset_channel_cb(struct vmbus_channel *channel); + extern int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, u32 bufferlen, -- GitLab From b86374912fee81fc266355b2a911cbbb92ffff54 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 7 Jul 2018 12:44:01 -0500 Subject: [PATCH 1237/1291] iio: sca3000: Fix missing return in switch commit c5b974bee9d2ceae4c441ae5a01e498c2674e100 upstream. The IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY case is missing a return and will fall through to the default case and errorenously return -EINVAL. Fix this by adding in missing *return ret*. Fixes: 626f971b5b07 ("staging:iio:accel:sca3000 Add write support to the low pass filter control") Reported-by: Jonathan Cameron Signed-off-by: Gustavo A. R. Silva Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/sca3000.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/accel/sca3000.c b/drivers/iio/accel/sca3000.c index 565f7d8d3304..f2761b385541 100644 --- a/drivers/iio/accel/sca3000.c +++ b/drivers/iio/accel/sca3000.c @@ -797,6 +797,7 @@ static int sca3000_write_raw(struct iio_dev *indio_dev, mutex_lock(&st->lock); ret = sca3000_write_3db_freq(st, val); mutex_unlock(&st->lock); + return ret; default: return -EINVAL; } -- GitLab From e4d3a25111dc6f907a78bf1d333af10b09bc7e59 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 25 Jun 2018 11:03:07 +0300 Subject: [PATCH 1238/1291] iio: ad9523: Fix displayed phase commit 5a4e33c1c53ae7d4425f7d94e60e4458a37b349e upstream. Fix the displayed phase for the ad9523 driver. Currently the most significant decimal place is dropped and all other digits are shifted one to the left. This is due to a multiplication by 10, which is not necessary, so remove it. Signed-off-by: Lars-Peter Clausen Signed-off-by: Alexandru Ardelean Fixes: cd1678f9632 ("iio: frequency: New driver for AD9523 SPI Low Jitter Clock Generator") Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/frequency/ad9523.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/frequency/ad9523.c b/drivers/iio/frequency/ad9523.c index 99eba524f6dd..cb45fa314a92 100644 --- a/drivers/iio/frequency/ad9523.c +++ b/drivers/iio/frequency/ad9523.c @@ -642,7 +642,7 @@ static int ad9523_read_raw(struct iio_dev *indio_dev, code = (AD9523_CLK_DIST_DIV_PHASE_REV(ret) * 3141592) / AD9523_CLK_DIST_DIV_REV(ret); *val = code / 1000000; - *val2 = (code % 1000000) * 10; + *val2 = code % 1000000; return IIO_VAL_INT_PLUS_MICRO; default: return -EINVAL; -- GitLab From 3f9481902f0b65292addc62771e2c6f59149044d Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 27 Jul 2018 09:42:45 +0300 Subject: [PATCH 1239/1291] iio: ad9523: Fix return value for ad952x_store() commit 9a5094ca29ea9b1da301b31fd377c0c0c4c23034 upstream. A sysfs write callback function needs to either return the number of consumed characters or an error. The ad952x_store() function currently returns 0 if the input value was "0", this will signal that no characters have been consumed and the function will be called repeatedly in a loop indefinitely. Fix this by returning number of supplied characters to indicate that the whole input string has been consumed. Signed-off-by: Lars-Peter Clausen Signed-off-by: Alexandru Ardelean Fixes: cd1678f96329 ("iio: frequency: New driver for AD9523 SPI Low Jitter Clock Generator") Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/frequency/ad9523.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/frequency/ad9523.c b/drivers/iio/frequency/ad9523.c index cb45fa314a92..1642b55f70da 100644 --- a/drivers/iio/frequency/ad9523.c +++ b/drivers/iio/frequency/ad9523.c @@ -508,7 +508,7 @@ static ssize_t ad9523_store(struct device *dev, return ret; if (!state) - return 0; + return len; mutex_lock(&indio_dev->mlock); switch ((u32)this_attr->address) { -- GitLab From c0a8e047734ce3dcce51715108ff666829fa539e Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Thu, 14 Jun 2018 11:16:29 +0900 Subject: [PATCH 1240/1291] extcon: Release locking when sending the notification of connector state commit 8a9dbb779fe882325b9a0238494a7afaff2eb444 upstream. Previously, extcon used the spinlock before calling the notifier_call_chain to prevent the scheduled out of task and to prevent the notification delay. When spinlock is locked for sending the notification, deadlock issue occured on the side of extcon consumer device. To fix this issue, extcon consumer device should always use the work. it is always not reasonable to use work. To fix this issue on extcon consumer device, release locking when sending the notification of connector state. Fixes: ab11af049f88 ("extcon: Add the synchronization extcon APIs to support the notification") Cc: stable@vger.kernel.org Cc: Roger Quadros Cc: Kishon Vijay Abraham I Signed-off-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- drivers/extcon/extcon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c index 35e9fb885486..95e96f04bf6f 100644 --- a/drivers/extcon/extcon.c +++ b/drivers/extcon/extcon.c @@ -433,8 +433,8 @@ int extcon_sync(struct extcon_dev *edev, unsigned int id) return index; spin_lock_irqsave(&edev->lock, flags); - state = !!(edev->state & BIT(index)); + spin_unlock_irqrestore(&edev->lock, flags); /* * Call functions in a raw notifier chain for the specific one @@ -448,6 +448,7 @@ int extcon_sync(struct extcon_dev *edev, unsigned int id) */ raw_notifier_call_chain(&edev->nh_all, state, edev); + spin_lock_irqsave(&edev->lock, flags); /* This could be in interrupt handler */ prop_buf = (char *)get_zeroed_page(GFP_ATOMIC); if (!prop_buf) { -- GitLab From 9fd44e90903c0f8eb3bee8160280ebf749e992a9 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 19 Jun 2018 16:00:24 -0700 Subject: [PATCH 1241/1291] vmw_balloon: fix inflation of 64-bit GFNs commit 09755690c6b7c1eabdc4651eb3b276f8feb1e447 upstream. When balloon batching is not supported by the hypervisor, the guest frame number (GFN) must fit in 32-bit. However, due to a bug, this check was mistakenly ignored. In practice, when total RAM is greater than 16TB, the balloon does not work currently, making this bug unlikely to happen. Fixes: ef0f8f112984 ("VMware balloon: partially inline vmballoon_reserve_page.") Cc: stable@vger.kernel.org Reviewed-by: Xavier Deguillard Signed-off-by: Nadav Amit Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_balloon.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 56c6f79a5c5a..f31b2f6fde22 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -450,7 +450,7 @@ static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, pfn32 = (u32)pfn; if (pfn32 != pfn) - return -1; + return -EINVAL; STATS_INC(b->stats.lock[false]); @@ -460,7 +460,7 @@ static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); STATS_INC(b->stats.lock_fail[false]); - return 1; + return -EIO; } static int vmballoon_send_batched_lock(struct vmballoon *b, @@ -597,11 +597,12 @@ static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages, locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status, target); - if (locked > 0) { + if (locked) { STATS_INC(b->stats.refused_alloc[false]); - if (hv_status == VMW_BALLOON_ERROR_RESET || - hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) { + if (locked == -EIO && + (hv_status == VMW_BALLOON_ERROR_RESET || + hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED)) { vmballoon_free_page(page, false); return -EIO; } @@ -617,7 +618,7 @@ static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages, } else { vmballoon_free_page(page, false); } - return -EIO; + return locked; } /* track allocated page */ -- GitLab From d3b403844db50f87052d4e0387f384e24e052f44 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 19 Jun 2018 16:00:25 -0700 Subject: [PATCH 1242/1291] vmw_balloon: do not use 2MB without batching commit 5081efd112560d3febb328e627176235b250d59d upstream. If the hypervisor sets 2MB batching is on, while batching is cleared, the balloon code breaks. In this case the legacy mechanism is used with 2MB page. The VM would report a 2MB page is ballooned, and the hypervisor would only take the first 4KB. While the hypervisor should not report such settings, make the code more robust by not enabling 2MB support without batching. Fixes: 365bd7ef7ec8e ("VMware balloon: Support 2m page ballooning.") Cc: stable@vger.kernel.org Reviewed-by: Xavier Deguillard Signed-off-by: Nadav Amit Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_balloon.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index f31b2f6fde22..4967cebddf48 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -341,7 +341,13 @@ static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) success = false; } - if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) + /* + * 2MB pages are only supported with batching. If batching is for some + * reason disabled, do not use 2MB pages, since otherwise the legacy + * mechanism is used with 2MB pages, causing a failure. + */ + if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) && + (b->capabilities & VMW_BALLOON_BATCHED_CMDS)) b->supported_page_sizes = 2; else b->supported_page_sizes = 1; -- GitLab From 89667b269e8716390fe558ab35e82d65013b13cd Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 19 Jun 2018 16:00:26 -0700 Subject: [PATCH 1243/1291] vmw_balloon: VMCI_DOORBELL_SET does not check status commit ce664331b2487a5d244a51cbdd8cb54f866fbe5d upstream. When vmballoon_vmci_init() sets a doorbell using VMCI_DOORBELL_SET, for some reason it does not consider the status and looks at the result. However, the hypervisor does not update the result - it updates the status. This might cause VMCI doorbell not to be enabled, resulting in degraded performance. Fixes: 48e3d668b790 ("VMware balloon: Enable notification via VMCI") Cc: stable@vger.kernel.org Reviewed-by: Xavier Deguillard Signed-off-by: Nadav Amit Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_balloon.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 4967cebddf48..fe56e72f9463 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -1036,29 +1036,30 @@ static void vmballoon_vmci_cleanup(struct vmballoon *b) */ static int vmballoon_vmci_init(struct vmballoon *b) { - int error = 0; + unsigned long error, dummy; - if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) != 0) { - error = vmci_doorbell_create(&b->vmci_doorbell, - VMCI_FLAG_DELAYED_CB, - VMCI_PRIVILEGE_FLAG_RESTRICTED, - vmballoon_doorbell, b); - - if (error == VMCI_SUCCESS) { - VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, - b->vmci_doorbell.context, - b->vmci_doorbell.resource, error); - STATS_INC(b->stats.doorbell_set); - } - } + if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0) + return 0; - if (error != 0) { - vmballoon_vmci_cleanup(b); + error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB, + VMCI_PRIVILEGE_FLAG_RESTRICTED, + vmballoon_doorbell, b); - return -EIO; - } + if (error != VMCI_SUCCESS) + goto fail; + + error = VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, b->vmci_doorbell.context, + b->vmci_doorbell.resource, dummy); + + STATS_INC(b->stats.doorbell_set); + + if (error != VMW_BALLOON_SUCCESS) + goto fail; return 0; +fail: + vmballoon_vmci_cleanup(b); + return -EIO; } /* -- GitLab From bbac5374b537ad81ce70d8434e1364b549e3d4a1 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 19 Jun 2018 16:00:27 -0700 Subject: [PATCH 1244/1291] vmw_balloon: fix VMCI use when balloon built into kernel commit c3cc1b0fc27508da53fe955a3b23d03964410682 upstream. Currently, when all modules, including VMCI and VMware balloon are built into the kernel, the initialization of the balloon happens before the VMCI is probed. As a result, the balloon fails to initialize the VMCI doorbell, which it uses to get asynchronous requests for balloon size changes. The problem can be seen in the logs, in the form of the following message: "vmw_balloon: failed to initialize vmci doorbell" The driver would work correctly but slightly less efficiently, probing for requests periodically. This patch changes the balloon to be initialized using late_initcall() instead of module_init() to address this issue. It does not address a situation in which VMCI is built as a module and the balloon is built into the kernel. Fixes: 48e3d668b790 ("VMware balloon: Enable notification via VMCI") Cc: stable@vger.kernel.org Reviewed-by: Xavier Deguillard Signed-off-by: Nadav Amit Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_balloon.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index fe56e72f9463..5f8b583c6e41 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -1297,7 +1297,14 @@ static int __init vmballoon_init(void) return 0; } -module_init(vmballoon_init); + +/* + * Using late_initcall() instead of module_init() allows the balloon to use the + * VMCI doorbell even when the balloon is built into the kernel. Otherwise the + * VMCI is probed only after the balloon is initialized. If the balloon is used + * as a module, late_initcall() is equivalent to module_init(). + */ +late_initcall(vmballoon_init); static void __exit vmballoon_exit(void) { -- GitLab From 2b4c940dccbe29029c75a4bf7016f864425057a9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Jul 2018 11:05:55 +0200 Subject: [PATCH 1245/1291] rtc: omap: fix potential crash on power off commit 5c8b84f410b3819d14cb1ebf32e4b3714b5a6e0b upstream. Do not set the system power-off callback and omap power-off rtc pointer until we're done setting up our device to avoid leaving stale pointers around after a late probe error. Fixes: 97ea1906b3c2 ("rtc: omap: Support ext_wakeup configuration") Cc: stable # 4.9 Cc: Marcin Niestroj Cc: Tony Lindgren Signed-off-by: Johan Hovold Acked-by: Tony Lindgren Reviewed-by: Marcin Niestroj Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-omap.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index 13f7cd11c07e..ac6e6a6a194c 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -817,13 +817,6 @@ static int omap_rtc_probe(struct platform_device *pdev) goto err; } - if (rtc->is_pmic_controller) { - if (!pm_power_off) { - omap_rtc_power_off_rtc = rtc; - pm_power_off = omap_rtc_power_off; - } - } - /* Support ext_wakeup pinconf */ rtc_pinctrl_desc.name = dev_name(&pdev->dev); @@ -833,6 +826,13 @@ static int omap_rtc_probe(struct platform_device *pdev) return PTR_ERR(rtc->pctldev); } + if (rtc->is_pmic_controller) { + if (!pm_power_off) { + omap_rtc_power_off_rtc = rtc; + pm_power_off = omap_rtc_power_off; + } + } + return 0; err: -- GitLab From 4c9016757e3bf695a6716946d7fd5f0f486a475a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 1 Aug 2018 15:40:57 -0400 Subject: [PATCH 1246/1291] tracing: Do not call start/stop() functions when tracing_on does not change commit f143641bfef9a4a60c57af30de26c63057e7e695 upstream. Currently, when one echo's in 1 into tracing_on, the current tracer's "start()" function is executed, even if tracing_on was already one. This can lead to strange side effects. One being that if the hwlat tracer is enabled, and someone does "echo 1 > tracing_on" into tracing_on, the hwlat tracer's start() function is called again which will recreate another kernel thread, and make it unable to remove the old one. Link: http://lkml.kernel.org/r/1533120354-22923-1-git-send-email-erica.bugden@linutronix.de Cc: stable@vger.kernel.org Fixes: 2df8f8a6a897e ("tracing: Fix regression with irqsoff tracer and tracing_on file") Reported-by: Erica Bugden Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b7302c37c064..e9cbb96cd99e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7545,7 +7545,9 @@ rb_simple_write(struct file *filp, const char __user *ubuf, if (buffer) { mutex_lock(&trace_types_lock); - if (val) { + if (!!val == tracer_tracing_is_on(tr)) { + val = 0; /* do nothing */ + } else if (val) { tracer_tracing_on(tr); if (tr->current_trace->start) tr->current_trace->start(tr); -- GitLab From cbde057aa0e7b2fcca06e7f6ba28b8d76a041ce9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 16 Aug 2018 16:08:37 -0400 Subject: [PATCH 1247/1291] tracing/blktrace: Fix to allow setting same value commit 757d9140072054528b13bbe291583d9823cde195 upstream. Masami Hiramatsu reported: Current trace-enable attribute in sysfs returns an error if user writes the same setting value as current one, e.g. # cat /sys/block/sda/trace/enable 0 # echo 0 > /sys/block/sda/trace/enable bash: echo: write error: Invalid argument # echo 1 > /sys/block/sda/trace/enable # echo 1 > /sys/block/sda/trace/enable bash: echo: write error: Device or resource busy But this is not a preferred behavior, it should ignore if new setting is same as current one. This fixes the problem as below. # cat /sys/block/sda/trace/enable 0 # echo 0 > /sys/block/sda/trace/enable # echo 1 > /sys/block/sda/trace/enable # echo 1 > /sys/block/sda/trace/enable Link: http://lkml.kernel.org/r/20180816103802.08678002@gandalf.local.home Cc: Ingo Molnar Cc: Jens Axboe Cc: linux-block@vger.kernel.org Cc: stable@vger.kernel.org Fixes: cd649b8bb830d ("blktrace: remove sysfs_blk_trace_enable_show/store()") Reported-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- kernel/trace/blktrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e73dcab8e9f0..71a8ee6e60dc 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1809,6 +1809,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, mutex_lock(&q->blk_trace_mutex); if (attr == &dev_attr_enable) { + if (!!value == !!q->blk_trace) { + ret = 0; + goto out_unlock_bdev; + } if (value) ret = blk_trace_setup_queue(q, bdev); else -- GitLab From 68a735eb9a16147a85024c47e6176fe4599b91ef Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 5 Sep 2018 16:29:49 -0400 Subject: [PATCH 1248/1291] printk/tracing: Do not trace printk_nmi_enter() commit d1c392c9e2a301f38998a353f467f76414e38725 upstream. I hit the following splat in my tests: ------------[ cut here ]------------ IRQs not enabled as expected WARNING: CPU: 3 PID: 0 at kernel/time/tick-sched.c:982 tick_nohz_idle_enter+0x44/0x8c Modules linked in: ip6t_REJECT nf_reject_ipv6 ip6table_filter ip6_tables ipv6 CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.19.0-rc2-test+ #2 Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014 EIP: tick_nohz_idle_enter+0x44/0x8c Code: ec 05 00 00 00 75 26 83 b8 c0 05 00 00 00 75 1d 80 3d d0 36 3e c1 00 75 14 68 94 63 12 c1 c6 05 d0 36 3e c1 01 e8 04 ee f8 ff <0f> 0b 58 fa bb a0 e5 66 c1 e8 25 0f 04 00 64 03 1d 28 31 52 c1 8b EAX: 0000001c EBX: f26e7f8c ECX: 00000006 EDX: 00000007 ESI: f26dd1c0 EDI: 00000000 EBP: f26e7f40 ESP: f26e7f38 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010296 CR0: 80050033 CR2: 0813c6b0 CR3: 2f342000 CR4: 001406f0 Call Trace: do_idle+0x33/0x202 cpu_startup_entry+0x61/0x63 start_secondary+0x18e/0x1ed startup_32_smp+0x164/0x168 irq event stamp: 18773830 hardirqs last enabled at (18773829): [] trace_hardirqs_on_thunk+0xc/0x10 hardirqs last disabled at (18773830): [] trace_hardirqs_off_thunk+0xc/0x10 softirqs last enabled at (18773824): [] __do_softirq+0x25f/0x2bf softirqs last disabled at (18773767): [] call_on_stack+0x45/0x4b ---[ end trace b7c64aa79e17954a ]--- After a bit of debugging, I found what was happening. This would trigger when performing "perf" with a high NMI interrupt rate, while enabling and disabling function tracer. Ftrace uses breakpoints to convert the nops at the start of functions to calls to the function trampolines. The breakpoint traps disable interrupts and this makes calls into lockdep via the trace_hardirqs_off_thunk in the entry.S code. What happens is the following: do_idle { [interrupts enabled] [interrupts disabled] TRACE_IRQS_OFF [lockdep says irqs off] [...] TRACE_IRQS_IRET test if pt_regs say return to interrupts enabled [yes] TRACE_IRQS_ON [lockdep says irqs are on] nmi_enter() { printk_nmi_enter() [traced by ftrace] [ hit ftrace breakpoint ] TRACE_IRQS_OFF [lockdep says irqs off] [...] TRACE_IRQS_IRET [return from breakpoint] test if pt_regs say interrupts enabled [no] [iret back to interrupt] [iret back to code] tick_nohz_idle_enter() { lockdep_assert_irqs_enabled() [lockdep say no!] Although interrupts are indeed enabled, lockdep thinks it is not, and since we now do asserts via lockdep, it gives a false warning. The issue here is that printk_nmi_enter() is called before lockdep_off(), which disables lockdep (for this reason) in NMIs. By simply not allowing ftrace to see printk_nmi_enter() (via notrace annotation) we keep lockdep from getting confused. Cc: stable@vger.kernel.org Fixes: 42a0bb3f71383 ("printk/nmi: generic solution for safe printk in NMI") Acked-by: Sergey Senozhatsky Acked-by: Petr Mladek Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/printk/printk_safe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index d482fd61ac67..64f8046586b6 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -309,12 +309,12 @@ static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args) return printk_safe_log_store(s, fmt, args); } -void printk_nmi_enter(void) +void notrace printk_nmi_enter(void) { this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); } -void printk_nmi_exit(void) +void notrace printk_nmi_exit(void) { this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK); } -- GitLab From a36e2aa90576263341b3179d47cfe733be7a8085 Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Fri, 20 Jul 2018 15:16:42 +0530 Subject: [PATCH 1249/1291] livepatch: Validate module/old func name length commit 6e9df95b76cad18f7b217bdad7bb8a26d63b8c47 upstream. livepatch module author can pass module name/old function name with more than the defined character limit. With obj->name length greater than MODULE_NAME_LEN, the livepatch module gets loaded but waits forever on the module specified by obj->name to be loaded. It also populates a /sys directory with an untruncated object name. In the case of funcs->old_name length greater then KSYM_NAME_LEN, it would not match against any of the symbol table entries. Instead loop through the symbol table comparing them against a nonexisting function, which can be avoided. The same issues apply, to misspelled/incorrect names. At least gatekeep the modules with over the limit string length, by checking for their length during livepatch module registration. Cc: stable@vger.kernel.org Signed-off-by: Kamalesh Babulal Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- kernel/livepatch/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index bf8c8fd72589..7c51f065b212 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -605,6 +605,9 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func) if (!func->old_name || !func->new_func) return -EINVAL; + if (strlen(func->old_name) >= KSYM_NAME_LEN) + return -EINVAL; + INIT_LIST_HEAD(&func->stack_node); func->patched = false; func->transition = false; @@ -678,6 +681,9 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj) if (!obj->funcs) return -EINVAL; + if (klp_is_module(obj) && strlen(obj->name) >= MODULE_NAME_LEN) + return -EINVAL; + obj->patched = false; obj->mod = NULL; -- GitLab From 4f6789cad6472a09200a60928ef6c56ae457ed38 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 9 Aug 2018 15:37:59 -0400 Subject: [PATCH 1250/1291] uprobes: Use synchronize_rcu() not synchronize_sched() commit 016f8ffc48cb01d1e7701649c728c5d2e737d295 upstream. While debugging another bug, I was looking at all the synchronize*() functions being used in kernel/trace, and noticed that trace_uprobes was using synchronize_sched(), with a comment to synchronize with {u,ret}_probe_trace_func(). When looking at those functions, the data is protected with "rcu_read_lock()" and not with "rcu_read_lock_sched()". This is using the wrong synchronize_*() function. Link: http://lkml.kernel.org/r/20180809160553.469e1e32@gandalf.local.home Cc: stable@vger.kernel.org Fixes: 70ed91c6ec7f8 ("tracing/uprobes: Support ftrace_event_file base multibuffer") Acked-by: Oleg Nesterov Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_uprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 7197ff9f0bbd..ea0d90a31fc9 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -967,7 +967,7 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file) list_del_rcu(&link->list); /* synchronize with u{,ret}probe_trace_func */ - synchronize_sched(); + synchronize_rcu(); kfree(link); if (!list_empty(&tu->tp.files)) -- GitLab From aa9ceea207882ccc983c6cda4db37acbb391f7e4 Mon Sep 17 00:00:00 2001 From: Rafael David Tinoco Date: Fri, 6 Jul 2018 14:28:33 -0300 Subject: [PATCH 1251/1291] mfd: hi655x: Fix regmap area declared size for hi655x commit 6afebb70ee7a4bde106dc1a875e7ac7997248f84 upstream. Fixes https://bugs.linaro.org/show_bug.cgi?id=3903 LTP Functional tests have caused a bad paging request when triggering the regmap_read_debugfs() logic of the device PMIC Hi6553 (reading regmap/f8000000.pmic/registers file during read_all test): Unable to handle kernel paging request at virtual address ffff0 [ffff00000984e000] pgd=0000000077ffe803, pud=0000000077ffd803,0 Internal error: Oops: 96000007 [#1] SMP ... Hardware name: HiKey Development Board (DT) ... Call trace: regmap_mmio_read8+0x24/0x40 regmap_mmio_read+0x48/0x70 _regmap_bus_reg_read+0x38/0x48 _regmap_read+0x68/0x170 regmap_read+0x50/0x78 regmap_read_debugfs+0x1a0/0x308 regmap_map_read_file+0x48/0x58 full_proxy_read+0x68/0x98 __vfs_read+0x48/0x80 vfs_read+0x94/0x150 SyS_read+0x6c/0xd8 el0_svc_naked+0x30/0x34 Code: aa1e03e0 d503201f f9400280 8b334000 (39400000) Investigations have showed that, when triggered by debugfs read() handler, the mmio regmap logic was reading a bigger (16k) register area than the one mapped by devm_ioremap_resource() during hi655x-pmic probe time (4k). This commit changes hi655x's max register, according to HW specs, to be the same as the one declared in the pmic device in hi6220's dts, fixing the issue. Cc: #v4.9 #v4.14 #v4.16 #v4.17 Signed-off-by: Rafael David Tinoco Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/hi655x-pmic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/hi655x-pmic.c b/drivers/mfd/hi655x-pmic.c index c37ccbfd52f2..96c07fa1802a 100644 --- a/drivers/mfd/hi655x-pmic.c +++ b/drivers/mfd/hi655x-pmic.c @@ -49,7 +49,7 @@ static struct regmap_config hi655x_regmap_config = { .reg_bits = 32, .reg_stride = HI655X_STRIDE, .val_bits = 8, - .max_register = HI655X_BUS_ADDR(0xFFF), + .max_register = HI655X_BUS_ADDR(0x400) - HI655X_STRIDE, }; static struct resource pwrkey_resources[] = { -- GitLab From 8840ca570f2be567db83af3b62a214bc683a9991 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 17 Jul 2018 16:05:38 +0300 Subject: [PATCH 1252/1291] ovl: fix wrong use of impure dir cache in ovl_iterate() commit 67810693077afc1ebf9e1646af300436cb8103c2 upstream. Only upper dir can be impure, but if we are in the middle of iterating a lower real dir, dir could be copied up and marked impure. We only want the impure cache if we started iterating a real upper dir to begin with. Aditya Kali reported that the following reproducer hits the WARN_ON(!cache->refcount) in ovl_get_cache(): docker run --rm drupal:8.5.4-fpm-alpine \ sh -c 'cd /var/www/html/vendor/symfony && \ chown -R www-data:www-data . && ls -l .' Reported-by: Aditya Kali Tested-by: Aditya Kali Fixes: 4edb83bb1041 ('ovl: constant d_ino for non-merge dirs') Cc: # v4.14 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/readdir.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 7fa7d68baa6d..1d4f9997236f 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -623,6 +623,21 @@ static int ovl_fill_real(struct dir_context *ctx, const char *name, return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); } +static bool ovl_is_impure_dir(struct file *file) +{ + struct ovl_dir_file *od = file->private_data; + struct inode *dir = d_inode(file->f_path.dentry); + + /* + * Only upper dir can be impure, but if we are in the middle of + * iterating a lower real dir, dir could be copied up and marked + * impure. We only want the impure cache if we started iterating + * a real upper dir to begin with. + */ + return od->is_upper && ovl_test_flag(OVL_IMPURE, dir); + +} + static int ovl_iterate_real(struct file *file, struct dir_context *ctx) { int err; @@ -646,7 +661,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx) rdt.parent_ino = stat.ino; } - if (ovl_test_flag(OVL_IMPURE, d_inode(dir))) { + if (ovl_is_impure_dir(file)) { rdt.cache = ovl_cache_get_impure(&file->f_path); if (IS_ERR(rdt.cache)) return PTR_ERR(rdt.cache); @@ -676,7 +691,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) * entries. */ if (ovl_same_sb(dentry->d_sb) && - (ovl_test_flag(OVL_IMPURE, d_inode(dentry)) || + (ovl_is_impure_dir(file) || OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent)))) { return ovl_iterate_real(file, ctx); } -- GitLab From 256f63f52ec3fc0aa3a62d4c8f710ea39b7b7fdc Mon Sep 17 00:00:00 2001 From: Peter Kalauskas Date: Tue, 21 Aug 2018 21:54:02 -0700 Subject: [PATCH 1253/1291] drivers/block/zram/zram_drv.c: fix bug storing backing_dev commit c8bd134a4bddafe5917d163eea73873932c15e83 upstream. The call to strlcpy in backing_dev_store is incorrect. It should take the size of the destination buffer instead of the size of the source buffer. Additionally, ignore the newline character (\n) when reading the new file_name buffer. This makes it possible to set the backing_dev as follows: echo /dev/sdX > /sys/block/zram0/backing_dev The reason it worked before was the fact that strlcpy() copies 'len - 1' bytes, which is strlen(buf) - 1 in our case, so it accidentally didn't copy the trailing new line symbol. Which also means that "echo -n /dev/sdX" most likely was broken. Signed-off-by: Peter Kalauskas Link: http://lkml.kernel.org/r/20180813061623.GC64836@rodete-desktop-imager.corp.google.com Acked-by: Minchan Kim Reviewed-by: Sergey Senozhatsky Cc: [4.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/block/zram/zram_drv.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f149d3e61234..1e2648e4c286 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -321,6 +321,7 @@ static ssize_t backing_dev_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { char *file_name; + size_t sz; struct file *backing_dev = NULL; struct inode *inode; struct address_space *mapping; @@ -341,7 +342,11 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - strlcpy(file_name, buf, len); + strlcpy(file_name, buf, PATH_MAX); + /* ignore trailing newline */ + sz = strlen(file_name); + if (sz > 0 && file_name[sz - 1] == '\n') + file_name[sz - 1] = 0x00; backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); if (IS_ERR(backing_dev)) { -- GitLab From 924383edf44c2a4190820853c6e33c0deb24d98a Mon Sep 17 00:00:00 2001 From: Henry Willard Date: Tue, 14 Aug 2018 17:01:02 -0700 Subject: [PATCH 1254/1291] cpufreq: governor: Avoid accessing invalid governor_data commit 2a3eb51e30b9ac66fe1b75877627a7e4aaeca24a upstream. If cppc_cpufreq.ko is deleted at the same time that tuned-adm is changing profiles, there is a small chance that a race can occur between cpufreq_dbs_governor_exit() and cpufreq_dbs_governor_limits() resulting in a system failure when the latter tries to use policy->governor_data that has been freed by the former. This patch uses gov_dbs_data_mutex to synchronize access. Fixes: e788892ba3cc (cpufreq: governor: Get rid of governor events) Signed-off-by: Henry Willard [ rjw: Subject, minor white space adjustment ] Cc: 4.8+ # 4.8+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq_governor.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 43e14bb512c8..6a16d22bc604 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -555,12 +555,20 @@ EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_stop); void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy) { - struct policy_dbs_info *policy_dbs = policy->governor_data; + struct policy_dbs_info *policy_dbs; + + /* Protect gov->gdbs_data against cpufreq_dbs_governor_exit() */ + mutex_lock(&gov_dbs_data_mutex); + policy_dbs = policy->governor_data; + if (!policy_dbs) + goto out; mutex_lock(&policy_dbs->update_mutex); cpufreq_policy_apply_limits(policy); gov_update_sample_delay(policy_dbs, 0); - mutex_unlock(&policy_dbs->update_mutex); + +out: + mutex_unlock(&gov_dbs_data_mutex); } EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_limits); -- GitLab From 015156f5017987d4164630d763937834dd44506e Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Tue, 14 Aug 2018 10:34:42 +0800 Subject: [PATCH 1255/1291] PM / sleep: wakeup: Fix build error caused by missing SRCU support commit 3df6f61fff49632492490fb6e42646b803a9958a upstream. Commit ea0212f40c6 (power: auto select CONFIG_SRCU) made the code in drivers/base/power/wakeup.c use SRCU instead of RCU, but it forgot to select CONFIG_SRCU in Kconfig, which leads to the following build error if CONFIG_SRCU is not selected somewhere else: drivers/built-in.o: In function `wakeup_source_remove': (.text+0x3c6fc): undefined reference to `synchronize_srcu' drivers/built-in.o: In function `pm_print_active_wakeup_sources': (.text+0x3c7a8): undefined reference to `__srcu_read_lock' drivers/built-in.o: In function `pm_print_active_wakeup_sources': (.text+0x3c84c): undefined reference to `__srcu_read_unlock' drivers/built-in.o: In function `device_wakeup_arm_wake_irqs': (.text+0x3d1d8): undefined reference to `__srcu_read_lock' drivers/built-in.o: In function `device_wakeup_arm_wake_irqs': (.text+0x3d228): undefined reference to `__srcu_read_unlock' drivers/built-in.o: In function `device_wakeup_disarm_wake_irqs': (.text+0x3d24c): undefined reference to `__srcu_read_lock' drivers/built-in.o: In function `device_wakeup_disarm_wake_irqs': (.text+0x3d29c): undefined reference to `__srcu_read_unlock' drivers/built-in.o:(.data+0x4158): undefined reference to `process_srcu' Fix this error by selecting CONFIG_SRCU when PM_SLEEP is enabled. Fixes: ea0212f40c6 (power: auto select CONFIG_SRCU) Cc: 4.2+ # 4.2+ Signed-off-by: zhangyi (F) [ rjw: Minor subject/changelog fixups ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index e8517b63eb37..dd2b5a4d89a5 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -105,6 +105,7 @@ config PM_SLEEP def_bool y depends on SUSPEND || HIBERNATE_CALLBACKS select PM + select SRCU config PM_SLEEP_SMP def_bool y -- GitLab From 58936d4d7b14bdeb26515b03dc9a3dbf255a709b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Aug 2018 16:43:39 +0200 Subject: [PATCH 1256/1291] KVM: VMX: fixes for vmentry_l1d_flush module parameter commit 0027ff2a75f9dcf0537ac0a65c5840b0e21a4950 upstream. Two bug fixes: 1) missing entries in the l1d_param array; this can cause a host crash if an access attempts to reach the missing entry. Future-proof the get function against any overflows as well. However, the two entries VMENTER_L1D_FLUSH_EPT_DISABLED and VMENTER_L1D_FLUSH_NOT_REQUIRED must not be accepted by the parse function, so disable them there. 2) invalid values must be rejected even if the CPU does not have the bug, so test for them before checking boot_cpu_has(X86_BUG_L1TF) ... and a small refactoring, since the .cmd field is redundant with the index in the array. Reported-by: Bandan Das Cc: stable@vger.kernel.org Fixes: a7b9020b06ec6d7c3f3b0d4ef1a9eba12654f4f7 Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8958b35f6008..a466ee14ad41 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -200,12 +200,14 @@ static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_ static const struct { const char *option; - enum vmx_l1d_flush_state cmd; + bool for_parse; } vmentry_l1d_param[] = { - {"auto", VMENTER_L1D_FLUSH_AUTO}, - {"never", VMENTER_L1D_FLUSH_NEVER}, - {"cond", VMENTER_L1D_FLUSH_COND}, - {"always", VMENTER_L1D_FLUSH_ALWAYS}, + [VMENTER_L1D_FLUSH_AUTO] = {"auto", true}, + [VMENTER_L1D_FLUSH_NEVER] = {"never", true}, + [VMENTER_L1D_FLUSH_COND] = {"cond", true}, + [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true}, + [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false}, + [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false}, }; #define L1D_CACHE_ORDER 4 @@ -289,8 +291,9 @@ static int vmentry_l1d_flush_parse(const char *s) if (s) { for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) { - if (sysfs_streq(s, vmentry_l1d_param[i].option)) - return vmentry_l1d_param[i].cmd; + if (vmentry_l1d_param[i].for_parse && + sysfs_streq(s, vmentry_l1d_param[i].option)) + return i; } } return -EINVAL; @@ -300,13 +303,13 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) { int l1tf, ret; - if (!boot_cpu_has(X86_BUG_L1TF)) - return 0; - l1tf = vmentry_l1d_flush_parse(s); if (l1tf < 0) return l1tf; + if (!boot_cpu_has(X86_BUG_L1TF)) + return 0; + /* * Has vmx_init() run already? If not then this is the pre init * parameter parsing. In that case just store the value and let @@ -326,6 +329,9 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) { + if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param))) + return sprintf(s, "???\n"); + return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); } -- GitLab From 025cc91f8c5290dc1bf2586baf3767547760e899 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 23 Aug 2018 10:08:58 +1000 Subject: [PATCH 1257/1291] KVM: PPC: Book3S: Fix guest DMA when guest partially backed by THP pages commit 8cfbdbdc24815417a3ab35101ccf706b9a23ff17 upstream. Commit 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in the pinned physical page", 2018-07-17) added some checks to ensure that guest DMA mappings don't attempt to map more than the guest is entitled to access. However, errors in the logic mean that legitimate guest requests to map pages for DMA are being denied in some situations. Specifically, if the first page of the range passed to mm_iommu_get() is mapped with a normal page, and subsequent pages are mapped with transparent huge pages, we end up with mem->pageshift == 0. That means that the page size checks in mm_iommu_ua_to_hpa() and mm_iommu_up_to_hpa_rm() will always fail for every page in that region, and thus the guest can never map any memory in that region for DMA, typically leading to a flood of error messages like this: qemu-system-ppc64: VFIO_MAP_DMA: -22 qemu-system-ppc64: vfio_dma_map(0x10005f47780, 0x800000000000000, 0x10000, 0x7fff63ff0000) = -22 (Invalid argument) The logic errors in mm_iommu_get() are: (a) use of 'ua' not 'ua + (i << PAGE_SHIFT)' in the find_linux_pte() call (meaning that find_linux_pte() returns the pte for the first address in the range, not the address we are currently up to); (b) use of 'pageshift' as the variable to receive the hugepage shift returned by find_linux_pte() - for a normal page this gets set to 0, leading to us setting mem->pageshift to 0 when we conclude that the pte returned by find_linux_pte() didn't match the page we were looking at; (c) comparing 'compshift', which is a page order, i.e. log base 2 of the number of pages, with 'pageshift', which is a log base 2 of the number of bytes. To fix these problems, this patch introduces 'cur_ua' to hold the current user address and uses that in the find_linux_pte() call; introduces 'pteshift' to hold the hugepage shift found by find_linux_pte(); and compares 'pteshift' with 'compshift + PAGE_SHIFT' rather than 'compshift'. The patch also moves the local_irq_restore to the point after the PTE pointer returned by find_linux_pte() has been dereferenced because otherwise the PTE could change underneath us, and adds a check to avoid doing the find_linux_pte() call once mem->pageshift has been reduced to PAGE_SHIFT, as an optimization. Fixes: 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in the pinned physical page") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/mmu_context_iommu.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index 816055927ee4..d735937d975c 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -130,6 +130,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, long i, j, ret = 0, locked_entries = 0; unsigned int pageshift; unsigned long flags; + unsigned long cur_ua; struct page *page = NULL; mutex_lock(&mem_list_mutex); @@ -178,7 +179,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, } for (i = 0; i < entries; ++i) { - if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT), + cur_ua = ua + (i << PAGE_SHIFT); + if (1 != get_user_pages_fast(cur_ua, 1/* pages */, 1/* iswrite */, &page)) { ret = -EFAULT; for (j = 0; j < i; ++j) @@ -197,7 +199,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, if (is_migrate_cma_page(page)) { if (mm_iommu_move_page_from_cma(page)) goto populate; - if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT), + if (1 != get_user_pages_fast(cur_ua, 1/* pages */, 1/* iswrite */, &page)) { ret = -EFAULT; @@ -211,20 +213,21 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, } populate: pageshift = PAGE_SHIFT; - if (PageCompound(page)) { + if (mem->pageshift > PAGE_SHIFT && PageCompound(page)) { pte_t *pte; struct page *head = compound_head(page); unsigned int compshift = compound_order(head); + unsigned int pteshift; local_irq_save(flags); /* disables as well */ - pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift); - local_irq_restore(flags); + pte = find_linux_pte(mm->pgd, cur_ua, NULL, &pteshift); /* Double check it is still the same pinned page */ if (pte && pte_page(*pte) == head && - pageshift == compshift) - pageshift = max_t(unsigned int, pageshift, + pteshift == compshift + PAGE_SHIFT) + pageshift = max_t(unsigned int, pteshift, PAGE_SHIFT); + local_irq_restore(flags); } mem->pageshift = min(mem->pageshift, pageshift); mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; -- GitLab From 0d78efe0412b1b60409d3c977717c4c2625560f4 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 10 Aug 2018 20:43:48 -0700 Subject: [PATCH 1258/1291] xtensa: limit offsets in __loop_cache_{all,page} commit be75de25251f7cf3e399ca1f584716a95510d24a upstream. When building kernel for xtensa cores with big cache lines (e.g. 128 bytes or more) __loop_cache_all and __loop_cache_page may generate assembly instructions with immediate fields that are too big. This results in the following build errors: arch/xtensa/mm/misc.S: Assembler messages: arch/xtensa/mm/misc.S:464: Error: operand 2 of 'diwbi' has invalid value '256' arch/xtensa/mm/misc.S:464: Error: operand 2 of 'diwbi' has invalid value '384' arch/xtensa/kernel/head.S: Assembler messages: arch/xtensa/kernel/head.S:172: Error: operand 2 of 'diu' has invalid value '256' arch/xtensa/kernel/head.S:172: Error: operand 2 of 'diu' has invalid value '384' arch/xtensa/kernel/head.S:176: Error: operand 2 of 'iiu' has invalid value '256' arch/xtensa/kernel/head.S:176: Error: operand 2 of 'iiu' has invalid value '384' arch/xtensa/kernel/head.S:255: Error: operand 2 of 'diwb' has invalid value '256' arch/xtensa/kernel/head.S:255: Error: operand 2 of 'diwb' has invalid value '384' Add parameter max_immed to these macros and use it to limit values of immediate operands. Extract common code of these macros into the new macro __loop_cache_unroll. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/include/asm/cacheasm.h | 65 ++++++++++++++++++------------ 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/arch/xtensa/include/asm/cacheasm.h b/arch/xtensa/include/asm/cacheasm.h index 2041abb10a23..2c73b4571226 100644 --- a/arch/xtensa/include/asm/cacheasm.h +++ b/arch/xtensa/include/asm/cacheasm.h @@ -31,16 +31,32 @@ * */ - .macro __loop_cache_all ar at insn size line_width - movi \ar, 0 + .macro __loop_cache_unroll ar at insn size line_width max_immed + + .if (1 << (\line_width)) > (\max_immed) + .set _reps, 1 + .elseif (2 << (\line_width)) > (\max_immed) + .set _reps, 2 + .else + .set _reps, 4 + .endif + + __loopi \ar, \at, \size, (_reps << (\line_width)) + .set _index, 0 + .rep _reps + \insn \ar, _index << (\line_width) + .set _index, _index + 1 + .endr + __endla \ar, \at, _reps << (\line_width) + + .endm + - __loopi \ar, \at, \size, (4 << (\line_width)) - \insn \ar, 0 << (\line_width) - \insn \ar, 1 << (\line_width) - \insn \ar, 2 << (\line_width) - \insn \ar, 3 << (\line_width) - __endla \ar, \at, 4 << (\line_width) + .macro __loop_cache_all ar at insn size line_width max_immed + + movi \ar, 0 + __loop_cache_unroll \ar, \at, \insn, \size, \line_width, \max_immed .endm @@ -57,14 +73,9 @@ .endm - .macro __loop_cache_page ar at insn line_width + .macro __loop_cache_page ar at insn line_width max_immed - __loopi \ar, \at, PAGE_SIZE, 4 << (\line_width) - \insn \ar, 0 << (\line_width) - \insn \ar, 1 << (\line_width) - \insn \ar, 2 << (\line_width) - \insn \ar, 3 << (\line_width) - __endla \ar, \at, 4 << (\line_width) + __loop_cache_unroll \ar, \at, \insn, PAGE_SIZE, \line_width, \max_immed .endm @@ -72,7 +83,8 @@ .macro ___unlock_dcache_all ar at #if XCHAL_DCACHE_LINE_LOCKABLE && XCHAL_DCACHE_SIZE - __loop_cache_all \ar \at diu XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH + __loop_cache_all \ar \at diu XCHAL_DCACHE_SIZE \ + XCHAL_DCACHE_LINEWIDTH 240 #endif .endm @@ -81,7 +93,8 @@ .macro ___unlock_icache_all ar at #if XCHAL_ICACHE_LINE_LOCKABLE && XCHAL_ICACHE_SIZE - __loop_cache_all \ar \at iiu XCHAL_ICACHE_SIZE XCHAL_ICACHE_LINEWIDTH + __loop_cache_all \ar \at iiu XCHAL_ICACHE_SIZE \ + XCHAL_ICACHE_LINEWIDTH 240 #endif .endm @@ -90,7 +103,8 @@ .macro ___flush_invalidate_dcache_all ar at #if XCHAL_DCACHE_SIZE - __loop_cache_all \ar \at diwbi XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH + __loop_cache_all \ar \at diwbi XCHAL_DCACHE_SIZE \ + XCHAL_DCACHE_LINEWIDTH 240 #endif .endm @@ -99,7 +113,8 @@ .macro ___flush_dcache_all ar at #if XCHAL_DCACHE_SIZE - __loop_cache_all \ar \at diwb XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH + __loop_cache_all \ar \at diwb XCHAL_DCACHE_SIZE \ + XCHAL_DCACHE_LINEWIDTH 240 #endif .endm @@ -109,7 +124,7 @@ #if XCHAL_DCACHE_SIZE __loop_cache_all \ar \at dii __stringify(DCACHE_WAY_SIZE) \ - XCHAL_DCACHE_LINEWIDTH + XCHAL_DCACHE_LINEWIDTH 1020 #endif .endm @@ -119,7 +134,7 @@ #if XCHAL_ICACHE_SIZE __loop_cache_all \ar \at iii __stringify(ICACHE_WAY_SIZE) \ - XCHAL_ICACHE_LINEWIDTH + XCHAL_ICACHE_LINEWIDTH 1020 #endif .endm @@ -166,7 +181,7 @@ .macro ___flush_invalidate_dcache_page ar as #if XCHAL_DCACHE_SIZE - __loop_cache_page \ar \as dhwbi XCHAL_DCACHE_LINEWIDTH + __loop_cache_page \ar \as dhwbi XCHAL_DCACHE_LINEWIDTH 1020 #endif .endm @@ -175,7 +190,7 @@ .macro ___flush_dcache_page ar as #if XCHAL_DCACHE_SIZE - __loop_cache_page \ar \as dhwb XCHAL_DCACHE_LINEWIDTH + __loop_cache_page \ar \as dhwb XCHAL_DCACHE_LINEWIDTH 1020 #endif .endm @@ -184,7 +199,7 @@ .macro ___invalidate_dcache_page ar as #if XCHAL_DCACHE_SIZE - __loop_cache_page \ar \as dhi XCHAL_DCACHE_LINEWIDTH + __loop_cache_page \ar \as dhi XCHAL_DCACHE_LINEWIDTH 1020 #endif .endm @@ -193,7 +208,7 @@ .macro ___invalidate_icache_page ar as #if XCHAL_ICACHE_SIZE - __loop_cache_page \ar \as ihi XCHAL_ICACHE_LINEWIDTH + __loop_cache_page \ar \as ihi XCHAL_ICACHE_LINEWIDTH 1020 #endif .endm -- GitLab From fe806eb54bca42f854e2b74a08ebbba5a860f473 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 10 Aug 2018 22:21:22 -0700 Subject: [PATCH 1259/1291] xtensa: increase ranges in ___invalidate_{i,d}cache_all commit fec3259c9f747c039f90e99570540114c8d81a14 upstream. Cache invalidation macros use cache line size to iterate over invalidated cache lines, assuming that all cache ways are invalidated by single instruction, but xtensa ISA recommends to not assume that for future compatibility: In some implementations all ways at index Addry-1..z are invalidated regardless of the specified way, but for future compatibility this behavior should not be assumed. Iterate over all cache ways in ___invalidate_icache_all and ___invalidate_dcache_all. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/include/asm/cacheasm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/include/asm/cacheasm.h b/arch/xtensa/include/asm/cacheasm.h index 2c73b4571226..34545ecfdd6b 100644 --- a/arch/xtensa/include/asm/cacheasm.h +++ b/arch/xtensa/include/asm/cacheasm.h @@ -123,7 +123,7 @@ .macro ___invalidate_dcache_all ar at #if XCHAL_DCACHE_SIZE - __loop_cache_all \ar \at dii __stringify(DCACHE_WAY_SIZE) \ + __loop_cache_all \ar \at dii XCHAL_DCACHE_SIZE \ XCHAL_DCACHE_LINEWIDTH 1020 #endif @@ -133,7 +133,7 @@ .macro ___invalidate_icache_all ar at #if XCHAL_ICACHE_SIZE - __loop_cache_all \ar \at iii __stringify(ICACHE_WAY_SIZE) \ + __loop_cache_all \ar \at iii XCHAL_ICACHE_SIZE \ XCHAL_ICACHE_LINEWIDTH 1020 #endif -- GitLab From ed480f2b9e86102b5d57fcd708915ae4f34add5b Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Wed, 15 Aug 2018 23:56:45 +0200 Subject: [PATCH 1260/1291] block, bfq: return nbytes and not zero from struct cftype .write() method commit fc8ebd01deeb12728c83381f6ec923e4a192ffd3 upstream. The value that struct cftype .write() method returns is then directly returned to userspace as the value returned by write() syscall, so it should be the number of bytes actually written (or consumed) and not zero. Returning zero from write() syscall makes programs like /bin/echo or bash spin. Signed-off-by: Maciej S. Szmigiero Fixes: e21b7a0b9887 ("block, bfq: add full hierarchical scheduling and cgroups support") Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/bfq-cgroup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 5d53e504acae..4b571f3ea009 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -887,7 +887,8 @@ static ssize_t bfq_io_set_weight(struct kernfs_open_file *of, if (ret) return ret; - return bfq_io_set_weight_legacy(of_css(of), NULL, weight); + ret = bfq_io_set_weight_legacy(of_css(of), NULL, weight); + return ret ?: nbytes; } static int bfqg_print_stat(struct seq_file *sf, void *v) -- GitLab From ec13c53dc59f82cfaa71a295430c0005a8836f27 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:59:58 +0300 Subject: [PATCH 1261/1291] pnfs/blocklayout: off by one in bl_map_stripe() commit 0914bb965e38a055e9245637aed117efbe976e91 upstream. "dev->nr_children" is the number of children which were parsed successfully in bl_parse_stripe(). It could be all of them and then, in that case, it is equal to v->stripe.volumes_count. Either way, the > should be >= so that we don't go beyond the end of what we're supposed to. Fixes: 5c83746a0cf2 ("pnfs/blocklayout: in-kernel GETDEVICEINFO XDR parsing") Signed-off-by: Dan Carpenter Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org # 3.17+ Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/blocklayout/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 95f74bd2c067..70c4165d2d74 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -204,7 +204,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, chunk = div_u64(offset, dev->chunk_size); div_u64_rem(chunk, dev->nr_children, &chunk_idx); - if (chunk_idx > dev->nr_children) { + if (chunk_idx >= dev->nr_children) { dprintk("%s: invalid chunk idx %d (%lld/%lld)\n", __func__, chunk_idx, offset, dev->chunk_size); /* error, should not happen */ -- GitLab From bf23ba3737e097d80363b5f5aea6239a952a36a9 Mon Sep 17 00:00:00 2001 From: Bill Baker Date: Tue, 19 Jun 2018 16:24:58 -0500 Subject: [PATCH 1262/1291] NFSv4 client live hangs after live data migration recovery commit 0f90be132cbf1537d87a6a8b9e80867adac892f6 upstream. After a live data migration event at the NFS server, the client may send I/O requests to the wrong server, causing a live hang due to repeated recovery events. On the wire, this will appear as an I/O request failing with NFS4ERR_BADSESSION, followed by successful CREATE_SESSION, repeatedly. NFS4ERR_BADSSESSION is returned because the session ID being used was issued by the other server and is not valid at the old server. The failure is caused by async worker threads having cached the transport (xprt) in the rpc_task structure. After the migration recovery completes, the task is redispatched and the task resends the request to the wrong server based on the old value still present in tk_xprt. The solution is to recompute the tk_xprt field of the rpc_task structure so that the request goes to the correct server. Signed-off-by: Bill Baker Reviewed-by: Chuck Lever Tested-by: Helen Chao Fixes: fb43d17210ba ("SUNRPC: Use the multipath iterator to assign a ...") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 9 ++++++++- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 28 ++++++++++++++++++++-------- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 51deff8e1f86..dda4a3a3ef6e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -547,8 +547,15 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server, ret = -EIO; return ret; out_retry: - if (ret == 0) + if (ret == 0) { exception->retry = 1; + /* + * For NFS4ERR_MOVED, the client transport will need to + * be recomputed after migration recovery has completed. + */ + if (errorcode == -NFS4ERR_MOVED) + rpc_task_release_transport(task); + } return ret; } diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 71c237e8240e..166fc4e76df6 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -156,6 +156,7 @@ int rpc_switch_client_transport(struct rpc_clnt *, void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); +void rpc_task_release_transport(struct rpc_task *); void rpc_task_release_client(struct rpc_task *); int rpcb_create_local(struct net *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 2ad827db2704..6d118357d9dc 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -965,10 +965,20 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, } EXPORT_SYMBOL_GPL(rpc_bind_new_program); +void rpc_task_release_transport(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + + if (xprt) { + task->tk_xprt = NULL; + xprt_put(xprt); + } +} +EXPORT_SYMBOL_GPL(rpc_task_release_transport); + void rpc_task_release_client(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - struct rpc_xprt *xprt = task->tk_xprt; if (clnt != NULL) { /* Remove from client task list */ @@ -979,12 +989,14 @@ void rpc_task_release_client(struct rpc_task *task) rpc_release_client(clnt); } + rpc_task_release_transport(task); +} - if (xprt != NULL) { - task->tk_xprt = NULL; - - xprt_put(xprt); - } +static +void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) +{ + if (!task->tk_xprt) + task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi); } static @@ -992,8 +1004,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) { if (clnt != NULL) { - if (task->tk_xprt == NULL) - task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi); + rpc_task_set_transport(task, clnt); task->tk_client = clnt; atomic_inc(&clnt->cl_count); if (clnt->cl_softrtry) @@ -1529,6 +1540,7 @@ call_start(struct rpc_task *task) clnt->cl_program->version[clnt->cl_vers]->counts[idx]++; clnt->cl_stats->rpccnt++; task->tk_action = call_reserve; + rpc_task_set_transport(task, clnt); } /* -- GitLab From c5759d5a7e6a00ffd8a342cde8f33340891bd095 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2018 17:25:37 -0400 Subject: [PATCH 1263/1291] NFSv4: Fix locking in pnfs_generic_recover_commit_reqs commit d0fbb1d8a194c0ec0180c1d073ad709e45503a43 upstream. The use of the inode->i_lock was converted to a mutex, but we forgot to remove the old inode unlock/lock() pair that allowed the layout segment to be put inside the loop. Reported-by: Jia-Ju Bai Fixes: e824f99adaaf1 ("NFSv4: Use a mutex to protect the per-inode commit...") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pnfs_nfs.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 60da59be83b6..4a3dd66175fe 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -61,7 +61,7 @@ EXPORT_SYMBOL_GPL(pnfs_generic_commit_release); /* The generic layer is about to remove the req from the commit list. * If this will make the bucket empty, it will need to put the lseg reference. - * Note this must be called holding i_lock + * Note this must be called holding nfsi->commit_mutex */ void pnfs_generic_clear_request_commit(struct nfs_page *req, @@ -149,9 +149,7 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, if (list_empty(&b->written)) { freeme = b->wlseg; b->wlseg = NULL; - spin_unlock(&cinfo->inode->i_lock); pnfs_put_lseg(freeme); - spin_lock(&cinfo->inode->i_lock); goto restart; } } @@ -167,7 +165,7 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) LIST_HEAD(pages); int i; - spin_lock(&cinfo->inode->i_lock); + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); for (i = idx; i < fl_cinfo->nbuckets; i++) { bucket = &fl_cinfo->buckets[i]; if (list_empty(&bucket->committing)) @@ -177,12 +175,12 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) list_for_each(pos, &bucket->committing) cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, &pages); - spin_unlock(&cinfo->inode->i_lock); + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); nfs_retry_commit(&pages, freeme, cinfo, i); pnfs_put_lseg(freeme); - spin_lock(&cinfo->inode->i_lock); + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); } - spin_unlock(&cinfo->inode->i_lock); + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); } static unsigned int @@ -222,13 +220,13 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages, struct list_head *pos; bucket = &cinfo->ds->buckets[data->ds_commit_index]; - spin_lock(&cinfo->inode->i_lock); + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); list_for_each(pos, &bucket->committing) cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, pages); data->lseg = bucket->clseg; bucket->clseg = NULL; - spin_unlock(&cinfo->inode->i_lock); + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); } -- GitLab From d453f04e813a9ded568b0bbb918909698c8bfed0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2018 17:55:56 -0400 Subject: [PATCH 1264/1291] NFSv4: Fix a sleep in atomic context in nfs4_callback_sequence() commit 8618289c46556fd4dd259a1af02ccc448032f48d upstream. We must drop the lock before we can sleep in referring_call_exists(). Reported-by: Jia-Ju Bai Fixes: 045d2a6d076a ("NFSv4.1: Delay callback processing...") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/callback_proc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 516b2248cafe..2c3f398995f6 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -433,11 +433,14 @@ validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot, * a match. If the slot is in use and the sequence numbers match, the * client is still waiting for a response to the original request. */ -static bool referring_call_exists(struct nfs_client *clp, +static int referring_call_exists(struct nfs_client *clp, uint32_t nrclists, - struct referring_call_list *rclists) + struct referring_call_list *rclists, + spinlock_t *lock) + __releases(lock) + __acquires(lock) { - bool status = 0; + int status = 0; int i, j; struct nfs4_session *session; struct nfs4_slot_table *tbl; @@ -460,8 +463,10 @@ static bool referring_call_exists(struct nfs_client *clp, for (j = 0; j < rclist->rcl_nrefcalls; j++) { ref = &rclist->rcl_refcalls[j]; + spin_unlock(lock); status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid, ref->rc_sequenceid, HZ >> 1) < 0; + spin_lock(lock); if (status) goto out; } @@ -538,7 +543,8 @@ __be32 nfs4_callback_sequence(void *argp, void *resp, * related callback was received before the response to the original * call. */ - if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) { + if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists, + &tbl->slot_tbl_lock) < 0) { status = htonl(NFS4ERR_DELAY); goto out_unlock; } -- GitLab From ddcb92700552eeedea717dcca9b16b0a99d26679 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 3 Jul 2018 09:59:47 +0100 Subject: [PATCH 1265/1291] ARM: tegra: Fix Tegra30 Cardhu PCA954x reset commit 6e1811900b6fe6f2b4665dba6bd6ed32c6b98575 upstream. On all versions of Tegra30 Cardhu, the reset signal to the NXP PCA9546 I2C mux is connected to the Tegra GPIO BB0. Currently, this pin on the Tegra is not configured as a GPIO but as a special-function IO (SFIO) that is multiplexing the pin to an I2S controller. On exiting system suspend, I2C commands sent to the PCA9546 are failing because there is no ACK. Although it is not possible to see exactly what is happening to the reset during suspend, by ensuring it is configured as a GPIO and driven high, to de-assert the reset, the failures are no longer seen. Please note that this GPIO is also used to drive the reset signal going to the camera connector on the board. However, given that there is no camera support currently for Cardhu, this should not have any impact. Fixes: 40431d16ff11 ("ARM: tegra: enable PCA9546 on Cardhu") Cc: stable@vger.kernel.org Signed-off-by: Jon Hunter Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/tegra30-cardhu.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/tegra30-cardhu.dtsi b/arch/arm/boot/dts/tegra30-cardhu.dtsi index 92a9740c533f..3b1db7b9ec50 100644 --- a/arch/arm/boot/dts/tegra30-cardhu.dtsi +++ b/arch/arm/boot/dts/tegra30-cardhu.dtsi @@ -206,6 +206,7 @@ i2cmux@70 { #address-cells = <1>; #size-cells = <0>; reg = <0x70>; + reset-gpio = <&gpio TEGRA_GPIO(BB, 0) GPIO_ACTIVE_LOW>; }; }; -- GitLab From 7cf82f3b7a7710dc29fc90ce83bee8a1ea7ff6fb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Aug 2018 17:30:14 +0200 Subject: [PATCH 1266/1291] mm/tlb: Remove tlb_remove_table() non-concurrent condition commit a6f572084fbee8b30f91465f4a085d7a90901c57 upstream. Will noted that only checking mm_users is incorrect; we should also check mm_count in order to cover CPUs that have a lazy reference to this mm (and could do speculative TLB operations). If removing this turns out to be a performance issue, we can re-instate a more complete check, but in tlb_table_flush() eliding the call_rcu_sched(). Fixes: 267239116987 ("mm, powerpc: move the RCU page-table freeing into generic code") Reported-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Acked-by: Rik van Riel Acked-by: Will Deacon Cc: Nicholas Piggin Cc: David Miller Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index c9657f013a4d..93d5d324904b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -392,15 +392,6 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct mmu_table_batch **batch = &tlb->batch; - /* - * When there's less then two users of this mm there cannot be a - * concurrent page-table walk. - */ - if (atomic_read(&tlb->mm->mm_users) < 2) { - __tlb_remove_table(table); - return; - } - if (*batch == NULL) { *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); if (*batch == NULL) { -- GitLab From eb58c40465f35fa72a9b633040f367972ff2632c Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 7 Jun 2018 09:56:59 -0700 Subject: [PATCH 1267/1291] iommu/vt-d: Add definitions for PFSID commit 0f725561e168485eff7277d683405c05b192f537 upstream. When SRIOV VF device IOTLB is invalidated, we need to provide the PF source ID such that IOMMU hardware can gauge the depth of invalidation queue which is shared among VFs. This is needed when device invalidation throttle (DIT) capability is supported. This patch adds bit definitions for checking and tracking PFSID. Signed-off-by: Jacob Pan Cc: stable@vger.kernel.org Cc: "Ashok Raj" Cc: "Lu Baolu" Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 1 + include/linux/intel-iommu.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index e8414bcf8390..c4bffe5cc838 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -422,6 +422,7 @@ struct device_domain_info { struct list_head global; /* link to global list */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ + u16 pfsid; /* SRIOV physical function source ID */ u8 pasid_supported:3; u8 pasid_enabled:1; u8 pri_supported:1; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 485a5b48f038..c57d575947c4 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -112,6 +112,7 @@ * Extended Capability Register */ +#define ecap_dit(e) ((e >> 41) & 0x1) #define ecap_pasid(e) ((e >> 40) & 0x1) #define ecap_pss(e) ((e >> 35) & 0x1f) #define ecap_eafs(e) ((e >> 34) & 0x1) @@ -281,6 +282,7 @@ enum { #define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) #define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16) #define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) +#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) #define QI_DEV_IOTLB_SIZE 1 #define QI_DEV_IOTLB_MAX_INVS 32 @@ -305,6 +307,7 @@ enum { #define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32) #define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) #define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) +#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) #define QI_DEV_EIOTLB_MAX_INVS 32 #define QI_PGRP_IDX(idx) (((u64)(idx)) << 55) -- GitLab From c2ea292b13509ebb79a1a80c72d0b28d4e5a48db Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 7 Jun 2018 09:57:00 -0700 Subject: [PATCH 1268/1291] iommu/vt-d: Fix dev iotlb pfsid use commit 1c48db44924298ad0cb5a6386b88017539be8822 upstream. PFSID should be used in the invalidation descriptor for flushing device IOTLBs on SRIOV VFs. Signed-off-by: Jacob Pan Cc: stable@vger.kernel.org Cc: "Ashok Raj" Cc: "Lu Baolu" Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/dmar.c | 6 +++--- drivers/iommu/intel-iommu.c | 17 ++++++++++++++++- include/linux/intel-iommu.h | 5 ++--- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index e3dbb6101b4a..c0d1c4db5794 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1336,8 +1336,8 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, qi_submit_sync(&desc, iommu); } -void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, - u64 addr, unsigned mask) +void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u16 qdep, u64 addr, unsigned mask) { struct qi_desc desc; @@ -1352,7 +1352,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, qdep = 0; desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | - QI_DIOTLB_TYPE; + QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); qi_submit_sync(&desc, iommu); } diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c4bffe5cc838..aaf3fed97477 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1503,6 +1503,20 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) return; pdev = to_pci_dev(info->dev); + /* For IOMMU that supports device IOTLB throttling (DIT), we assign + * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge + * queue depth at PF level. If DIT is not set, PFSID will be treated as + * reserved, which should be set to 0. + */ + if (!ecap_dit(info->iommu->ecap)) + info->pfsid = 0; + else { + struct pci_dev *pf_pdev; + + /* pdev will be returned if device is not a vf */ + pf_pdev = pci_physfn(pdev); + info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn); + } #ifdef CONFIG_INTEL_IOMMU_SVM /* The PCIe spec, in its wisdom, declares that the behaviour of @@ -1568,7 +1582,8 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, sid = info->bus << 8 | info->devfn; qdep = info->ats_qdep; - qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, addr, mask); } spin_unlock_irqrestore(&device_domain_lock, flags); } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index c57d575947c4..a6ab2f51f703 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -453,9 +453,8 @@ extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, u64 type); extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type); -extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, - u64 addr, unsigned mask); - +extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u16 qdep, u64 addr, unsigned mask); extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern int dmar_ir_support(void); -- GitLab From b692c405a1ae577b44764351ad80e99e0a08be99 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 25 Jun 2018 18:34:10 +0200 Subject: [PATCH 1269/1291] sys: don't hold uts_sem while accessing userspace memory commit 42a0cc3478584d4d63f68f2f5af021ddbea771fa upstream. Holding uts_sem as a writer while accessing userspace memory allows a namespace admin to stall all processes that attempt to take uts_sem. Instead, move data through stack buffers and don't access userspace memory while uts_sem is held. Cc: stable@vger.kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jann Horn Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/osf_sys.c | 51 ++++++++--------- arch/sparc/kernel/sys_sparc_32.c | 22 +++++--- arch/sparc/kernel/sys_sparc_64.c | 20 ++++--- kernel/sys.c | 95 +++++++++++++++----------------- kernel/utsname_sysctl.c | 41 ++++++++------ 5 files changed, 119 insertions(+), 110 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index a48976dc9bcd..918c3938ef66 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -530,24 +530,19 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, const char __user *, path, SYSCALL_DEFINE1(osf_utsname, char __user *, name) { int error; + char tmp[5 * 32]; down_read(&uts_sem); - error = -EFAULT; - if (copy_to_user(name + 0, utsname()->sysname, 32)) - goto out; - if (copy_to_user(name + 32, utsname()->nodename, 32)) - goto out; - if (copy_to_user(name + 64, utsname()->release, 32)) - goto out; - if (copy_to_user(name + 96, utsname()->version, 32)) - goto out; - if (copy_to_user(name + 128, utsname()->machine, 32)) - goto out; + memcpy(tmp + 0 * 32, utsname()->sysname, 32); + memcpy(tmp + 1 * 32, utsname()->nodename, 32); + memcpy(tmp + 2 * 32, utsname()->release, 32); + memcpy(tmp + 3 * 32, utsname()->version, 32); + memcpy(tmp + 4 * 32, utsname()->machine, 32); + up_read(&uts_sem); - error = 0; - out: - up_read(&uts_sem); - return error; + if (copy_to_user(name, tmp, sizeof(tmp))) + return -EFAULT; + return 0; } SYSCALL_DEFINE0(getpagesize) @@ -567,18 +562,21 @@ SYSCALL_DEFINE2(osf_getdomainname, char __user *, name, int, namelen) { int len, err = 0; char *kname; + char tmp[32]; - if (namelen > 32) + if (namelen < 0 || namelen > 32) namelen = 32; down_read(&uts_sem); kname = utsname()->domainname; len = strnlen(kname, namelen); - if (copy_to_user(name, kname, min(len + 1, namelen))) - err = -EFAULT; + len = min(len + 1, namelen); + memcpy(tmp, kname, len); up_read(&uts_sem); - return err; + if (copy_to_user(name, tmp, len)) + return -EFAULT; + return 0; } /* @@ -739,13 +737,14 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count) }; unsigned long offset; const char *res; - long len, err = -EINVAL; + long len; + char tmp[__NEW_UTS_LEN + 1]; offset = command-1; if (offset >= ARRAY_SIZE(sysinfo_table)) { /* Digital UNIX has a few unpublished interfaces here */ printk("sysinfo(%d)", command); - goto out; + return -EINVAL; } down_read(&uts_sem); @@ -753,13 +752,11 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count) len = strlen(res)+1; if ((unsigned long)len > (unsigned long)count) len = count; - if (copy_to_user(buf, res, len)) - err = -EFAULT; - else - err = 0; + memcpy(tmp, res, len); up_read(&uts_sem); - out: - return err; + if (copy_to_user(buf, tmp, len)) + return -EFAULT; + return 0; } SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer, diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index 990703b7cf4d..4b7719b2a73c 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -204,23 +204,27 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, asmlinkage long sys_getdomainname(char __user *name, int len) { - int nlen, err; - + int nlen, err; + char tmp[__NEW_UTS_LEN + 1]; + if (len < 0) return -EINVAL; - down_read(&uts_sem); - + down_read(&uts_sem); + nlen = strlen(utsname()->domainname) + 1; err = -EINVAL; if (nlen > len) - goto out; + goto out_unlock; + memcpy(tmp, utsname()->domainname, nlen); - err = -EFAULT; - if (!copy_to_user(name, utsname()->domainname, nlen)) - err = 0; + up_read(&uts_sem); -out: + if (copy_to_user(name, tmp, nlen)) + return -EFAULT; + return 0; + +out_unlock: up_read(&uts_sem); return err; } diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 55416db482ad..d79c1c74873c 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -527,23 +527,27 @@ extern void check_pending(int signum); SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len) { - int nlen, err; + int nlen, err; + char tmp[__NEW_UTS_LEN + 1]; if (len < 0) return -EINVAL; - down_read(&uts_sem); - + down_read(&uts_sem); + nlen = strlen(utsname()->domainname) + 1; err = -EINVAL; if (nlen > len) - goto out; + goto out_unlock; + memcpy(tmp, utsname()->domainname, nlen); + + up_read(&uts_sem); - err = -EFAULT; - if (!copy_to_user(name, utsname()->domainname, nlen)) - err = 0; + if (copy_to_user(name, tmp, nlen)) + return -EFAULT; + return 0; -out: +out_unlock: up_read(&uts_sem); return err; } diff --git a/kernel/sys.c b/kernel/sys.c index de4ed027dfd7..e25ec93aea22 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1176,18 +1176,19 @@ static int override_release(char __user *release, size_t len) SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) { - int errno = 0; + struct new_utsname tmp; down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof *name)) - errno = -EFAULT; + memcpy(&tmp, utsname(), sizeof(tmp)); up_read(&uts_sem); + if (copy_to_user(name, &tmp, sizeof(tmp))) + return -EFAULT; - if (!errno && override_release(name->release, sizeof(name->release))) - errno = -EFAULT; - if (!errno && override_architecture(name)) - errno = -EFAULT; - return errno; + if (override_release(name->release, sizeof(name->release))) + return -EFAULT; + if (override_architecture(name)) + return -EFAULT; + return 0; } #ifdef __ARCH_WANT_SYS_OLD_UNAME @@ -1196,55 +1197,46 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) */ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) { - int error = 0; + struct old_utsname tmp; if (!name) return -EFAULT; down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof(*name))) - error = -EFAULT; + memcpy(&tmp, utsname(), sizeof(tmp)); up_read(&uts_sem); + if (copy_to_user(name, &tmp, sizeof(tmp))) + return -EFAULT; - if (!error && override_release(name->release, sizeof(name->release))) - error = -EFAULT; - if (!error && override_architecture(name)) - error = -EFAULT; - return error; + if (override_release(name->release, sizeof(name->release))) + return -EFAULT; + if (override_architecture(name)) + return -EFAULT; + return 0; } SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) { - int error; + struct oldold_utsname tmp = {}; if (!name) return -EFAULT; - if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) - return -EFAULT; down_read(&uts_sem); - error = __copy_to_user(&name->sysname, &utsname()->sysname, - __OLD_UTS_LEN); - error |= __put_user(0, name->sysname + __OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename, &utsname()->nodename, - __OLD_UTS_LEN); - error |= __put_user(0, name->nodename + __OLD_UTS_LEN); - error |= __copy_to_user(&name->release, &utsname()->release, - __OLD_UTS_LEN); - error |= __put_user(0, name->release + __OLD_UTS_LEN); - error |= __copy_to_user(&name->version, &utsname()->version, - __OLD_UTS_LEN); - error |= __put_user(0, name->version + __OLD_UTS_LEN); - error |= __copy_to_user(&name->machine, &utsname()->machine, - __OLD_UTS_LEN); - error |= __put_user(0, name->machine + __OLD_UTS_LEN); + memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN); + memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN); + memcpy(&tmp.release, &utsname()->release, __OLD_UTS_LEN); + memcpy(&tmp.version, &utsname()->version, __OLD_UTS_LEN); + memcpy(&tmp.machine, &utsname()->machine, __OLD_UTS_LEN); up_read(&uts_sem); + if (copy_to_user(name, &tmp, sizeof(tmp))) + return -EFAULT; - if (!error && override_architecture(name)) - error = -EFAULT; - if (!error && override_release(name->release, sizeof(name->release))) - error = -EFAULT; - return error ? -EFAULT : 0; + if (override_architecture(name)) + return -EFAULT; + if (override_release(name->release, sizeof(name->release))) + return -EFAULT; + return 0; } #endif @@ -1258,17 +1250,18 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; - down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - struct new_utsname *u = utsname(); + struct new_utsname *u; + down_write(&uts_sem); + u = utsname(); memcpy(u->nodename, tmp, len); memset(u->nodename + len, 0, sizeof(u->nodename) - len); errno = 0; uts_proc_notify(UTS_PROC_HOSTNAME); + up_write(&uts_sem); } - up_write(&uts_sem); return errno; } @@ -1276,8 +1269,9 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) { - int i, errno; + int i; struct new_utsname *u; + char tmp[__NEW_UTS_LEN + 1]; if (len < 0) return -EINVAL; @@ -1286,11 +1280,11 @@ SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) i = 1 + strlen(u->nodename); if (i > len) i = len; - errno = 0; - if (copy_to_user(name, u->nodename, i)) - errno = -EFAULT; + memcpy(tmp, u->nodename, i); up_read(&uts_sem); - return errno; + if (copy_to_user(name, tmp, i)) + return -EFAULT; + return 0; } #endif @@ -1309,17 +1303,18 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; - down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - struct new_utsname *u = utsname(); + struct new_utsname *u; + down_write(&uts_sem); + u = utsname(); memcpy(u->domainname, tmp, len); memset(u->domainname + len, 0, sizeof(u->domainname) - len); errno = 0; uts_proc_notify(UTS_PROC_DOMAINNAME); + up_write(&uts_sem); } - up_write(&uts_sem); return errno; } diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 233cd8fc6910..258033d62cb3 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -18,7 +18,7 @@ #ifdef CONFIG_PROC_SYSCTL -static void *get_uts(struct ctl_table *table, int write) +static void *get_uts(struct ctl_table *table) { char *which = table->data; struct uts_namespace *uts_ns; @@ -26,21 +26,9 @@ static void *get_uts(struct ctl_table *table, int write) uts_ns = current->nsproxy->uts_ns; which = (which - (char *)&init_uts_ns) + (char *)uts_ns; - if (!write) - down_read(&uts_sem); - else - down_write(&uts_sem); return which; } -static void put_uts(struct ctl_table *table, int write, void *which) -{ - if (!write) - up_read(&uts_sem); - else - up_write(&uts_sem); -} - /* * Special case of dostring for the UTS structure. This has locks * to observe. Should this be in kernel/sys.c ???? @@ -50,13 +38,34 @@ static int proc_do_uts_string(struct ctl_table *table, int write, { struct ctl_table uts_table; int r; + char tmp_data[__NEW_UTS_LEN + 1]; + memcpy(&uts_table, table, sizeof(uts_table)); - uts_table.data = get_uts(table, write); + uts_table.data = tmp_data; + + /* + * Buffer the value in tmp_data so that proc_dostring() can be called + * without holding any locks. + * We also need to read the original value in the write==1 case to + * support partial writes. + */ + down_read(&uts_sem); + memcpy(tmp_data, get_uts(table), sizeof(tmp_data)); + up_read(&uts_sem); r = proc_dostring(&uts_table, write, buffer, lenp, ppos); - put_uts(table, write, uts_table.data); - if (write) + if (write) { + /* + * Write back the new value. + * Note that, since we dropped uts_sem, the result can + * theoretically be incorrect if there are two parallel writes + * at non-zero offsets to the same sysctl. + */ + down_write(&uts_sem); + memcpy(get_uts(table), tmp_data, sizeof(tmp_data)); + up_write(&uts_sem); proc_sys_poll_notify(table->poll); + } return r; } -- GitLab From 656d6e6f6d235fe3e349a331ec2d8c5105df721e Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 25 Jun 2018 18:34:19 +0200 Subject: [PATCH 1270/1291] userns: move user access out of the mutex commit 5820f140edef111a9ea2ef414ab2428b8cb805b1 upstream. The old code would hold the userns_state_mutex indefinitely if memdup_user_nul stalled due to e.g. a userfault region. Prevent that by moving the memdup_user_nul in front of the mutex_lock(). Note: This changes the error precedence of invalid buf/count/*ppos vs map already written / capabilities missing. Fixes: 22d917d80e84 ("userns: Rework the user_namespace adding uid/gid...") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Acked-by: Christian Brauner Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- kernel/user_namespace.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index c490f1e4313b..ed80a88980f0 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -650,7 +650,16 @@ static ssize_t map_write(struct file *file, const char __user *buf, unsigned idx; struct uid_gid_extent *extent = NULL; char *kbuf = NULL, *pos, *next_line; - ssize_t ret = -EINVAL; + ssize_t ret; + + /* Only allow < page size writes at the beginning of the file */ + if ((*ppos != 0) || (count >= PAGE_SIZE)) + return -EINVAL; + + /* Slurp in the user data */ + kbuf = memdup_user_nul(buf, count); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); /* * The userns_state_mutex serializes all writes to any given map. @@ -684,19 +693,6 @@ static ssize_t map_write(struct file *file, const char __user *buf, if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) goto out; - /* Only allow < page size writes at the beginning of the file */ - ret = -EINVAL; - if ((*ppos != 0) || (count >= PAGE_SIZE)) - goto out; - - /* Slurp in the user data */ - kbuf = memdup_user_nul(buf, count); - if (IS_ERR(kbuf)) { - ret = PTR_ERR(kbuf); - kbuf = NULL; - goto out; - } - /* Parse the user data */ ret = -EINVAL; pos = kbuf; -- GitLab From a230db38a9fd9b422bb7f114da9bb7141c87fded Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 12 Jun 2018 20:49:45 +0200 Subject: [PATCH 1271/1291] ubifs: Fix memory leak in lprobs self-check commit eef19816ada3abd56d9f20c88794cc2fea83ebb2 upstream. Allocate the buffer after we return early. Otherwise memory is being leaked. Cc: Fixes: 1e51764a3c2a ("UBIFS: add new flash file system") Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/lprops.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 6c3a1abd0e22..780a436d8c45 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -1091,10 +1091,6 @@ static int scan_check_cb(struct ubifs_info *c, } } - buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); - if (!buf) - return -ENOMEM; - /* * After an unclean unmount, empty and freeable LEBs * may contain garbage - do not scan them. @@ -1113,6 +1109,10 @@ static int scan_check_cb(struct ubifs_info *c, return LPT_SCAN_CONTINUE; } + buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + if (!buf) + return -ENOMEM; + sleb = ubifs_scan(c, lnum, 0, buf, 0); if (IS_ERR(sleb)) { ret = PTR_ERR(sleb); -- GitLab From 3259dd7176e4d98248f2dcc94f540dbca051ff42 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 1 Jul 2018 23:20:50 +0200 Subject: [PATCH 1272/1291] Revert "UBIFS: Fix potential integer overflow in allocation" commit 08acbdd6fd736b90f8d725da5a0de4de2dd6de62 upstream. This reverts commit 353748a359f1821ee934afc579cf04572406b420. It bypassed the linux-mtd review process and fixes the issue not as it should. Cc: Kees Cook Cc: Silvio Cesare Cc: stable@vger.kernel.org Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/journal.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 8ae1cd8611cc..04c4ec6483e5 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -1283,11 +1283,10 @@ static int truncate_data_node(const struct ubifs_info *c, const struct inode *in int *new_len) { void *buf; - int err, compr_type; - u32 dlen, out_len, old_dlen; + int err, dlen, compr_type, out_len, old_dlen; out_len = le32_to_cpu(dn->size); - buf = kmalloc_array(out_len, WORST_COMPR_FACTOR, GFP_NOFS); + buf = kmalloc(out_len * WORST_COMPR_FACTOR, GFP_NOFS); if (!buf) return -ENOMEM; -- GitLab From f6d7acc1d9cae277b0379e8209eaefe59a3fbf79 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 1 Jul 2018 23:20:51 +0200 Subject: [PATCH 1273/1291] ubifs: Check data node size before truncate commit 95a22d2084d72ea067d8323cc85677dba5d97cae upstream. Check whether the size is within bounds before using it. If the size is not correct, abort and dump the bad data node. Cc: Kees Cook Cc: Silvio Cesare Cc: stable@vger.kernel.org Fixes: 1e51764a3c2ac ("UBIFS: add new flash file system") Reported-by: Silvio Cesare Signed-off-by: Richard Weinberger Reviewed-by: Kees Cook Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/journal.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 04c4ec6483e5..b2419c855b47 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -1388,7 +1388,16 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, else if (err) goto out_free; else { - if (le32_to_cpu(dn->size) <= dlen) + int dn_len = le32_to_cpu(dn->size); + + if (dn_len <= 0 || dn_len > UBIFS_BLOCK_SIZE) { + ubifs_err(c, "bad data node (block %u, inode %lu)", + blk, inode->i_ino); + ubifs_dump_node(c, dn); + goto out_free; + } + + if (dn_len <= dlen) dlen = 0; /* Nothing to do */ else { err = truncate_data_node(c, inode, blk, dn, &dlen); -- GitLab From 8a23348d76a1e7716da6e76383281ac82fc071cf Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 8 Jul 2018 23:33:25 +0200 Subject: [PATCH 1274/1291] ubifs: xattr: Don't operate on deleted inodes commit 11a6fc3dc743e22fb50f2196ec55bee5140d3c52 upstream. xattr operations can race with unlink and the following assert triggers: UBIFS assert failed in ubifs_jnl_change_xattr at 1606 (pid 6256) Fix this by checking i_nlink before working on the host inode. Cc: Fixes: 1e51764a3c2a ("UBIFS: add new flash file system") Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/xattr.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index c13eae819cbc..d47f16c0d582 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -152,6 +152,12 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, ui->data_len = size; mutex_lock(&host_ui->ui_mutex); + + if (!host->i_nlink) { + err = -ENOENT; + goto out_noent; + } + host->i_ctime = current_time(host); host_ui->xattr_cnt += 1; host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm)); @@ -183,6 +189,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, host_ui->xattr_size -= CALC_XATTR_BYTES(size); host_ui->xattr_names -= fname_len(nm); host_ui->flags &= ~UBIFS_CRYPT_FL; +out_noent: mutex_unlock(&host_ui->ui_mutex); out_free: make_bad_inode(inode); @@ -234,6 +241,12 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, mutex_unlock(&ui->ui_mutex); mutex_lock(&host_ui->ui_mutex); + + if (!host->i_nlink) { + err = -ENOENT; + goto out_noent; + } + host->i_ctime = current_time(host); host_ui->xattr_size -= CALC_XATTR_BYTES(old_size); host_ui->xattr_size += CALC_XATTR_BYTES(size); @@ -255,6 +268,7 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, out_cancel: host_ui->xattr_size -= CALC_XATTR_BYTES(size); host_ui->xattr_size += CALC_XATTR_BYTES(old_size); +out_noent: mutex_unlock(&host_ui->ui_mutex); make_bad_inode(inode); out_free: @@ -483,6 +497,12 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host, return err; mutex_lock(&host_ui->ui_mutex); + + if (!host->i_nlink) { + err = -ENOENT; + goto out_noent; + } + host->i_ctime = current_time(host); host_ui->xattr_cnt -= 1; host_ui->xattr_size -= CALC_DENT_SIZE(fname_len(nm)); @@ -502,6 +522,7 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host, host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm)); host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); host_ui->xattr_names += fname_len(nm); +out_noent: mutex_unlock(&host_ui->ui_mutex); ubifs_release_budget(c, &req); make_bad_inode(inode); @@ -541,6 +562,9 @@ static int ubifs_xattr_remove(struct inode *host, const char *name) ubifs_assert(inode_is_locked(host)); + if (!host->i_nlink) + return -ENOENT; + if (fname_len(&nm) > UBIFS_MAX_NLEN) return -ENAMETOOLONG; -- GitLab From 63bbaa1469e6a037a1f7b84106bf8f0dcb3f6f71 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 12 Jun 2018 00:52:28 +0200 Subject: [PATCH 1275/1291] ubifs: Fix synced_i_size calculation for xattr inodes commit 59965593205fa4044850d35ee3557cf0b7edcd14 upstream. In ubifs_jnl_update() we sync parent and child inodes to the flash, in case of xattrs, the parent inode (AKA host inode) has a non-zero data_len. Therefore we need to adjust synced_i_size too. This issue was reported by ubifs self tests unter a xattr related work load. UBIFS error (ubi0:0 pid 1896): dbg_check_synced_i_size: ui_size is 4, synced_i_size is 0, but inode is clean UBIFS error (ubi0:0 pid 1896): dbg_check_synced_i_size: i_ino 65, i_mode 0x81a4, i_size 4 Cc: Fixes: 1e51764a3c2a ("UBIFS: add new flash file system") Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/journal.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index b2419c855b47..69051f7a9606 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -665,6 +665,11 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, spin_lock(&ui->ui_lock); ui->synced_i_size = ui->ui_size; spin_unlock(&ui->ui_lock); + if (xent) { + spin_lock(&host_ui->ui_lock); + host_ui->synced_i_size = host_ui->ui_size; + spin_unlock(&host_ui->ui_lock); + } mark_inode_clean(c, ui); mark_inode_clean(c, host_ui); return 0; -- GitLab From 0ef9c771924d485e8c3768fdd35acc203525f352 Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Mon, 11 Jun 2018 11:39:55 +0530 Subject: [PATCH 1276/1291] pwm: tiehrpwm: Don't use emulation mode bits to control PWM output commit aa49d628f6e016bcec8c6f8e704b9b18ee697329 upstream. As per AM335x TRM SPRUH73P "15.2.2.11 ePWM Behavior During Emulation", TBCTL[15:14] only have effect during emulation suspend events (IOW, to stop PWM when debugging using a debugger). These bits have no effect on PWM output during normal running of system. Hence, remove code accessing these bits as they have no role in enabling/disabling PWMs. Fixes: 19891b20e7c2 ("pwm: pwm-tiehrpwm: PWM driver support for EHRPWM") Cc: stable@vger.kernel.org Signed-off-by: Vignesh R Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/pwm-tiehrpwm.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c index 4c22cb395040..768176f54d5e 100644 --- a/drivers/pwm/pwm-tiehrpwm.c +++ b/drivers/pwm/pwm-tiehrpwm.c @@ -33,10 +33,6 @@ #define TBCTL 0x00 #define TBPRD 0x0A -#define TBCTL_RUN_MASK (BIT(15) | BIT(14)) -#define TBCTL_STOP_NEXT 0 -#define TBCTL_STOP_ON_CYCLE BIT(14) -#define TBCTL_FREE_RUN (BIT(15) | BIT(14)) #define TBCTL_PRDLD_MASK BIT(3) #define TBCTL_PRDLD_SHDW 0 #define TBCTL_PRDLD_IMDT BIT(3) @@ -360,7 +356,7 @@ static int ehrpwm_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) /* Channels polarity can be configured from action qualifier module */ configure_polarity(pc, pwm->hwpwm); - /* Enable TBCLK before enabling PWM device */ + /* Enable TBCLK */ ret = clk_enable(pc->tbclk); if (ret) { dev_err(chip->dev, "Failed to enable TBCLK for %s: %d\n", @@ -368,9 +364,6 @@ static int ehrpwm_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) return ret; } - /* Enable time counter for free_run */ - ehrpwm_modify(pc->mmio_base, TBCTL, TBCTL_RUN_MASK, TBCTL_FREE_RUN); - return 0; } @@ -400,9 +393,6 @@ static void ehrpwm_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) /* Disabling TBCLK on PWM disable */ clk_disable(pc->tbclk); - /* Stop Time base counter */ - ehrpwm_modify(pc->mmio_base, TBCTL, TBCTL_RUN_MASK, TBCTL_STOP_NEXT); - /* Disable clock on PWM disable */ pm_runtime_put_sync(chip->dev); } -- GitLab From 9b0dd656d5932580d3727f8afa20e81f0e6d8cfe Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Mon, 11 Jun 2018 11:39:56 +0530 Subject: [PATCH 1277/1291] pwm: tiehrpwm: Fix disabling of output of PWMs commit 38dabd91ff0bde33352ca3cc65ef515599b77a05 upstream. pwm-tiehrpwm driver disables PWM output by putting it in low output state via active AQCSFRC register in ehrpwm_pwm_disable(). But, the AQCSFRC shadow register is not updated. Therefore, when shadow AQCSFRC register is re-enabled in ehrpwm_pwm_enable() (say to enable second PWM output), previous settings are lost as shadow register value is loaded into active register. This results in things like PWMA getting enabled automatically, when PWMB is enabled and vice versa. Fix this by updating AQCSFRC shadow register as well during ehrpwm_pwm_disable(). Fixes: 19891b20e7c2 ("pwm: pwm-tiehrpwm: PWM driver support for EHRPWM") Cc: stable@vger.kernel.org Signed-off-by: Vignesh R Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/pwm-tiehrpwm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c index 768176f54d5e..f7b8a86fa5c5 100644 --- a/drivers/pwm/pwm-tiehrpwm.c +++ b/drivers/pwm/pwm-tiehrpwm.c @@ -381,6 +381,8 @@ static void ehrpwm_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) aqcsfrc_mask = AQCSFRC_CSFA_MASK; } + /* Update shadow register first before modifying active register */ + ehrpwm_modify(pc->mmio_base, AQCSFRC, aqcsfrc_mask, aqcsfrc_val); /* * Changes to immediate action on Action Qualifier. This puts * Action Qualifier control on PWM output from next TBCLK -- GitLab From d0f2eb3a419be3ac0304fa83c6b0086ef7fc095a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 25 Jul 2018 15:41:54 +0200 Subject: [PATCH 1278/1291] fb: fix lost console when the user unplugs a USB adapter commit 8c5b044299951acd91e830a688dd920477ea1eda upstream. I have a USB display adapter using the udlfb driver and I use it on an ARM board that doesn't have any graphics card. When I plug the adapter in, the console is properly displayed, however when I unplug and re-plug the adapter, the console is not displayed and I can't access it until I reboot the board. The reason is this: When the adapter is unplugged, dlfb_usb_disconnect calls unlink_framebuffer, then it waits until the reference count drops to zero and then it deallocates the framebuffer. However, the console that is attached to the framebuffer device keeps the reference count non-zero, so the framebuffer device is never destroyed. When the USB adapter is plugged again, it creates a new device /dev/fb1 and the console is not attached to it. This patch fixes the bug by unbinding the console from unlink_framebuffer. The code to unbind the console is moved from do_unregister_framebuffer to a function unbind_console. When the console is unbound, the reference count drops to zero and the udlfb driver frees the framebuffer. When the adapter is plugged back, a new framebuffer is created and the console is attached to it. Signed-off-by: Mikulas Patocka Cc: Dave Airlie Cc: Bernie Thompson Cc: Ladislav Michl Cc: stable@vger.kernel.org [b.zolnierkie: preserve old behavior for do_unregister_framebuffer()] Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fbmem.c | 38 +++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index f741ba8df01b..11d73b5fc885 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1716,12 +1716,12 @@ static int do_register_framebuffer(struct fb_info *fb_info) return 0; } -static int do_unregister_framebuffer(struct fb_info *fb_info) +static int unbind_console(struct fb_info *fb_info) { struct fb_event event; - int i, ret = 0; + int ret; + int i = fb_info->node; - i = fb_info->node; if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info) return -EINVAL; @@ -1736,17 +1736,29 @@ static int do_unregister_framebuffer(struct fb_info *fb_info) unlock_fb_info(fb_info); console_unlock(); + return ret; +} + +static int __unlink_framebuffer(struct fb_info *fb_info); + +static int do_unregister_framebuffer(struct fb_info *fb_info) +{ + struct fb_event event; + int ret; + + ret = unbind_console(fb_info); + if (ret) return -EINVAL; pm_vt_switch_unregister(fb_info->dev); - unlink_framebuffer(fb_info); + __unlink_framebuffer(fb_info); if (fb_info->pixmap.addr && (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT)) kfree(fb_info->pixmap.addr); fb_destroy_modelist(&fb_info->modelist); - registered_fb[i] = NULL; + registered_fb[fb_info->node] = NULL; num_registered_fb--; fb_cleanup_device(fb_info); event.info = fb_info; @@ -1759,7 +1771,7 @@ static int do_unregister_framebuffer(struct fb_info *fb_info) return 0; } -int unlink_framebuffer(struct fb_info *fb_info) +static int __unlink_framebuffer(struct fb_info *fb_info) { int i; @@ -1771,6 +1783,20 @@ int unlink_framebuffer(struct fb_info *fb_info) device_destroy(fb_class, MKDEV(FB_MAJOR, i)); fb_info->dev = NULL; } + + return 0; +} + +int unlink_framebuffer(struct fb_info *fb_info) +{ + int ret; + + ret = __unlink_framebuffer(fb_info); + if (ret) + return ret; + + unbind_console(fb_info); + return 0; } EXPORT_SYMBOL(unlink_framebuffer); -- GitLab From 19b99719970be85d17c55a0f688b746a11e82eb0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 25 Jul 2018 15:41:55 +0200 Subject: [PATCH 1279/1291] udlfb: set optimal write delay commit bb24153a3f13dd0dbc1f8055ad97fe346d598f66 upstream. The default delay 5 jiffies is too much when the kernel is compiled with HZ=100 - it results in jumpy cursor in Xwindow. In order to find out the optimal delay, I benchmarked the driver on 1280x720x30fps video. I found out that with HZ=1000, 10ms is acceptable, but with HZ=250 or HZ=300, we need 4ms, so that the video is played without any frame skips. This patch changes the delay to this value. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- include/video/udlfb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/video/udlfb.h b/include/video/udlfb.h index 1252a7a89bc0..85e32ee739fc 100644 --- a/include/video/udlfb.h +++ b/include/video/udlfb.h @@ -88,7 +88,7 @@ struct dlfb_data { #define MIN_RAW_PIX_BYTES 2 #define MIN_RAW_CMD_BYTES (RAW_HEADER_BYTES + MIN_RAW_PIX_BYTES) -#define DL_DEFIO_WRITE_DELAY 5 /* fb_deferred_io.delay in jiffies */ +#define DL_DEFIO_WRITE_DELAY msecs_to_jiffies(HZ <= 300 ? 4 : 10) /* optimal value for 720p video */ #define DL_DEFIO_WRITE_DISABLE (HZ*60) /* "disable" with long delay */ /* remove these once align.h patch is taken into kernel */ -- GitLab From ff0791f467d0fe641d7feadaf823a1fd16cdf7c9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 7 Jun 2018 13:43:48 +0200 Subject: [PATCH 1280/1291] getxattr: use correct xattr length commit 82c9a927bc5df6e06b72d206d24a9d10cced4eb5 upstream. When running in a container with a user namespace, if you call getxattr with name = "system.posix_acl_access" and size % 8 != 4, then getxattr silently skips the user namespace fixup that it normally does resulting in un-fixed-up data being returned. This is caused by posix_acl_fix_xattr_to_user() being passed the total buffer size and not the actual size of the xattr as returned by vfs_getxattr(). This commit passes the actual length of the xattr as returned by vfs_getxattr() down. A reproducer for the issue is: touch acl_posix setfacl -m user:0:rwx acl_posix and the compile: #define _GNU_SOURCE #include #include #include #include #include #include #include /* Run in user namespace with nsuid 0 mapped to uid != 0 on the host. */ int main(int argc, void **argv) { ssize_t ret1, ret2; char buf1[128], buf2[132]; int fret = EXIT_SUCCESS; char *file; if (argc < 2) { fprintf(stderr, "Please specify a file with " "\"system.posix_acl_access\" permissions set\n"); _exit(EXIT_FAILURE); } file = argv[1]; ret1 = getxattr(file, "system.posix_acl_access", buf1, sizeof(buf1)); if (ret1 < 0) { fprintf(stderr, "%s - Failed to retrieve " "\"system.posix_acl_access\" " "from \"%s\"\n", strerror(errno), file); _exit(EXIT_FAILURE); } ret2 = getxattr(file, "system.posix_acl_access", buf2, sizeof(buf2)); if (ret2 < 0) { fprintf(stderr, "%s - Failed to retrieve " "\"system.posix_acl_access\" " "from \"%s\"\n", strerror(errno), file); _exit(EXIT_FAILURE); } if (ret1 != ret2) { fprintf(stderr, "The value of \"system.posix_acl_" "access\" for file \"%s\" changed " "between two successive calls\n", file); _exit(EXIT_FAILURE); } for (ssize_t i = 0; i < ret2; i++) { if (buf1[i] == buf2[i]) continue; fprintf(stderr, "Unexpected different in byte %zd: " "%02x != %02x\n", i, buf1[i], buf2[i]); fret = EXIT_FAILURE; } if (fret == EXIT_SUCCESS) fprintf(stderr, "Test passed\n"); else fprintf(stderr, "Test failed\n"); _exit(fret); } and run: ./tester acl_posix On a non-fixed up kernel this should return something like: root@c1:/# ./t Unexpected different in byte 16: ffffffa0 != 00 Unexpected different in byte 17: ffffff86 != 00 Unexpected different in byte 18: 01 != 00 and on a fixed kernel: root@c1:~# ./t Test passed Cc: stable@vger.kernel.org Fixes: 2f6f0654ab61 ("userns: Convert vfs posix_acl support to use kuids and kgids") Link: https://bugzilla.kernel.org/show_bug.cgi?id=199945 Reported-by: Colin Watson Signed-off-by: Christian Brauner Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- fs/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xattr.c b/fs/xattr.c index 61cd28ba25f3..be2ce57cd6ad 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -541,7 +541,7 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, if (error > 0) { if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) - posix_acl_fix_xattr_to_user(kvalue, size); + posix_acl_fix_xattr_to_user(kvalue, error); if (size && copy_to_user(value, kvalue, error)) error = -EFAULT; } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { -- GitLab From c8d875b471b6f0304d5d85d0c33c58374b1d78d7 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Fri, 10 Aug 2018 13:23:15 -0600 Subject: [PATCH 1281/1291] libnvdimm: fix ars_status output length calculation commit 286e87718103acdf85f4ed323a37e4839a8a7c05 upstream. Commit efda1b5d87cb ("acpi, nfit, libnvdimm: fix / harden ars_status output length handling") Introduced additional hardening for ambiguity in the ACPI spec for ars_status output sizing. However, it had a couple of cases mixed up. Where it should have been checking for (and returning) "out_field[1] - 4" it was using "out_field[1] - 8" and vice versa. This caused a four byte discrepancy in the buffer size passed on to the command handler, and in some cases, this caused memory corruption like: ./daxdev-errors.sh: line 76: 24104 Aborted (core dumped) ./daxdev-errors $busdev $region malloc(): memory corruption Program received signal SIGABRT, Aborted. [...] #5 0x00007ffff7865a2e in calloc () from /lib64/libc.so.6 #6 0x00007ffff7bc2970 in ndctl_bus_cmd_new_ars_status (ars_cap=ars_cap@entry=0x6153b0) at ars.c:136 #7 0x0000000000401644 in check_ars_status (check=0x7fffffffdeb0, bus=0x604c20) at daxdev-errors.c:144 #8 test_daxdev_clear_error (region_name=, bus_name=) at daxdev-errors.c:332 Cc: Cc: Dave Jiang Cc: Keith Busch Cc: Lukasz Dorau Cc: Dan Williams Fixes: efda1b5d87cb ("acpi, nfit, libnvdimm: fix / harden ars_status output length handling") Signed-off-by: Vishal Verma Reviewed-by: Keith Busch Signed-of-by: Dave Jiang Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 2fffd42767c7..fb5ab5812a22 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -808,9 +808,9 @@ u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd, * overshoots the remainder by 4 bytes, assume it was * including 'status'. */ - if (out_field[1] - 8 == remainder) + if (out_field[1] - 4 == remainder) return remainder; - return out_field[1] - 4; + return out_field[1] - 8; } else if (cmd == ND_CMD_CALL) { struct nd_cmd_pkg *pkg = (struct nd_cmd_pkg *) in_field; -- GitLab From d1a265da7b2983e3201988f1a8202e749e2d4352 Mon Sep 17 00:00:00 2001 From: Shan Hai Date: Thu, 23 Aug 2018 02:02:56 +0800 Subject: [PATCH 1282/1291] bcache: release dc->writeback_lock properly in bch_writeback_thread() commit 3943b040f11ed0cc6d4585fd286a623ca8634547 upstream. The writeback thread would exit with a lock held when the cache device is detached via sysfs interface, fix it by releasing the held lock before exiting the while-loop. Fixes: fadd94e05c02 (bcache: quit dc->writeback_thread when BCACHE_DEV_DETACHING is set) Signed-off-by: Shan Hai Signed-off-by: Coly Li Tested-by: Shenghui Wang Cc: stable@vger.kernel.org #4.17+ Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/writeback.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 930b00f6a3a2..5adb0c850b6c 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -456,8 +456,10 @@ static int bch_writeback_thread(void *arg) * data on cache. BCACHE_DEV_DETACHING flag is set in * bch_cached_dev_detach(). */ - if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) + if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) { + up_write(&dc->writeback_lock); break; + } } up_write(&dc->writeback_lock); -- GitLab From 5a842ecca279f583c3fdc9a1ed8fe7a4cc348789 Mon Sep 17 00:00:00 2001 From: "Eddie.Horng" Date: Fri, 20 Jul 2018 15:30:00 +0800 Subject: [PATCH 1283/1291] cap_inode_getsecurity: use d_find_any_alias() instead of d_find_alias() commit 355139a8dba446cc11a424cddbf7afebc3041ba1 upstream. The code in cap_inode_getsecurity(), introduced by commit 8db6c34f1dbc ("Introduce v3 namespaced file capabilities"), should use d_find_any_alias() instead of d_find_alias() do handle unhashed dentry correctly. This is needed, for example, if execveat() is called with an open but unlinked overlayfs file, because overlayfs unhashes dentry on unlink. This is a regression of real life application, first reported at https://www.spinics.net/lists/linux-unionfs/msg05363.html Below reproducer and setup can reproduce the case. const char* exec="echo"; const char *newargv[] = { "echo", "hello", NULL}; const char *newenviron[] = { NULL }; int fd, err; fd = open(exec, O_PATH); unlink(exec); err = syscall(322/*SYS_execveat*/, fd, "", newargv, newenviron, AT_EMPTY_PATH); if(err<0) fprintf(stderr, "execveat: %s\n", strerror(errno)); gcc compile into ~/test/a.out mount -t overlay -orw,lowerdir=/mnt/l,upperdir=/mnt/u,workdir=/mnt/w none /mnt/m cd /mnt/m cp /bin/echo . ~/test/a.out Expected result: hello Actually result: execveat: Invalid argument dmesg: Invalid argument reading file caps for /dev/fd/3 The 2nd reproducer and setup emulates similar case but for regular filesystem: const char* exec="echo"; int fd, err; char buf[256]; fd = open(exec, O_RDONLY); unlink(exec); err = fgetxattr(fd, "security.capability", buf, 256); if(err<0) fprintf(stderr, "fgetxattr: %s\n", strerror(errno)); gcc compile into ~/test_fgetxattr cd /tmp cp /bin/echo . ~/test_fgetxattr Result: fgetxattr: Invalid argument On regular filesystem, for example, ext4 read xattr from disk and return to execveat(), will not trigger this issue, however, the overlay attr handler pass real dentry to vfs_getxattr() will. This reproducer calls fgetxattr() with an unlinked fd, involkes vfs_getxattr() then reproduced the case that d_find_alias() in cap_inode_getsecurity() can't find the unlinked dentry. Suggested-by: Amir Goldstein Acked-by: Amir Goldstein Acked-by: Serge E. Hallyn Fixes: 8db6c34f1dbc ("Introduce v3 namespaced file capabilities") Cc: # v4.14 Signed-off-by: Eddie Horng Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- security/commoncap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/commoncap.c b/security/commoncap.c index 1c1f64582bb5..ae26ef006988 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -388,7 +388,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, if (strcmp(name, "capability") != 0) return -EOPNOTSUPP; - dentry = d_find_alias(inode); + dentry = d_find_any_alias(inode); if (!dentry) return -EINVAL; -- GitLab From 300ec47ab8ea3434ed4b7e7cb5e8eeccc5ad9def Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 14 Aug 2018 11:46:08 +0300 Subject: [PATCH 1284/1291] perf auxtrace: Fix queue resize commit 99cbbe56eb8bede625f410ab62ba34673ffa7d21 upstream. When the number of queues grows beyond 32, the array of queues is resized but not all members were being copied. Fix by also copying 'tid', 'cpu' and 'set'. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Fixes: e502789302a6e ("perf auxtrace: Add helpers for queuing AUX area tracing data") Link: http://lkml.kernel.org/r/20180814084608.6563-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/auxtrace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 5547457566a7..bbb9823e93b9 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -197,6 +197,9 @@ static int auxtrace_queues__grow(struct auxtrace_queues *queues, for (i = 0; i < queues->nr_queues; i++) { list_splice_tail(&queues->queue_array[i].head, &queue_array[i].head); + queue_array[i].tid = queues->queue_array[i].tid; + queue_array[i].cpu = queues->queue_array[i].cpu; + queue_array[i].set = queues->queue_array[i].set; queue_array[i].priv = queues->queue_array[i].priv; } -- GitLab From 9f830cf2d510035d6d2b41e4e5fa72c9c886225b Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Wed, 22 Aug 2018 08:26:31 +0200 Subject: [PATCH 1285/1291] crypto: vmx - Fix sleep-in-atomic bugs commit 0522236d4f9c5ab2e79889cb020d1acbe5da416e upstream. This patch fixes sleep-in-atomic bugs in AES-CBC and AES-XTS VMX implementations. The problem is that the blkcipher_* functions should not be called in atomic context. The bugs can be reproduced via the AF_ALG interface by trying to encrypt/decrypt sufficiently large buffers (at least 64 KiB) using the VMX implementations of 'cbc(aes)' or 'xts(aes)'. Such operations then trigger BUG in crypto_yield(): [ 891.863680] BUG: sleeping function called from invalid context at include/crypto/algapi.h:424 [ 891.864622] in_atomic(): 1, irqs_disabled(): 0, pid: 12347, name: kcapi-enc [ 891.864739] 1 lock held by kcapi-enc/12347: [ 891.864811] #0: 00000000f5d42c46 (sk_lock-AF_ALG){+.+.}, at: skcipher_recvmsg+0x50/0x530 [ 891.865076] CPU: 5 PID: 12347 Comm: kcapi-enc Not tainted 4.19.0-0.rc0.git3.1.fc30.ppc64le #1 [ 891.865251] Call Trace: [ 891.865340] [c0000003387578c0] [c000000000d67ea4] dump_stack+0xe8/0x164 (unreliable) [ 891.865511] [c000000338757910] [c000000000172a58] ___might_sleep+0x2f8/0x310 [ 891.865679] [c000000338757990] [c0000000006bff74] blkcipher_walk_done+0x374/0x4a0 [ 891.865825] [c0000003387579e0] [d000000007e73e70] p8_aes_cbc_encrypt+0x1c8/0x260 [vmx_crypto] [ 891.865993] [c000000338757ad0] [c0000000006c0ee0] skcipher_encrypt_blkcipher+0x60/0x80 [ 891.866128] [c000000338757b10] [c0000000006ec504] skcipher_recvmsg+0x424/0x530 [ 891.866283] [c000000338757bd0] [c000000000b00654] sock_recvmsg+0x74/0xa0 [ 891.866403] [c000000338757c10] [c000000000b00f64] ___sys_recvmsg+0xf4/0x2f0 [ 891.866515] [c000000338757d90] [c000000000b02bb8] __sys_recvmsg+0x68/0xe0 [ 891.866631] [c000000338757e30] [c00000000000bbe4] system_call+0x5c/0x70 Fixes: 8c755ace357c ("crypto: vmx - Adding CBC routines for VMX module") Fixes: c07f5d3da643 ("crypto: vmx - Adding support for XTS") Cc: stable@vger.kernel.org Signed-off-by: Ondrej Mosnacek Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/vmx/aes_cbc.c | 30 ++++++++++++++---------------- drivers/crypto/vmx/aes_xts.c | 21 ++++++++++++++------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 5285ece4f33a..b71895871be3 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -107,24 +107,23 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc, ret = crypto_skcipher_encrypt(req); skcipher_request_zero(req); } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt(desc, &walk); while ((nbytes = walk.nbytes)) { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->enc_key, walk.iv, 1); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, &walk, nbytes); } - - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); } return ret; @@ -147,24 +146,23 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc, ret = crypto_skcipher_decrypt(req); skcipher_request_zero(req); } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt(desc, &walk); while ((nbytes = walk.nbytes)) { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->dec_key, walk.iv, 0); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, &walk, nbytes); } - - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); } return ret; diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c index 8bd9aff0f55f..e9954a7d4694 100644 --- a/drivers/crypto/vmx/aes_xts.c +++ b/drivers/crypto/vmx/aes_xts.c @@ -116,32 +116,39 @@ static int p8_aes_xts_crypt(struct blkcipher_desc *desc, ret = enc? crypto_skcipher_encrypt(req) : crypto_skcipher_decrypt(req); skcipher_request_zero(req); } else { + blkcipher_walk_init(&walk, dst, src, nbytes); + + ret = blkcipher_walk_virt(desc, &walk); + preempt_disable(); pagefault_disable(); enable_kernel_vsx(); - blkcipher_walk_init(&walk, dst, src, nbytes); - - ret = blkcipher_walk_virt(desc, &walk); iv = walk.iv; memset(tweak, 0, AES_BLOCK_SIZE); aes_p8_encrypt(iv, tweak, &ctx->tweak_key); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + while ((nbytes = walk.nbytes)) { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); if (enc) aes_p8_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->enc_key, NULL, tweak); else aes_p8_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->dec_key, NULL, tweak); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, &walk, nbytes); } - - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); } return ret; } -- GitLab From be6f98b203d380e58e64f989fc317a2acfdc67ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Mon, 6 Aug 2018 15:29:55 +0300 Subject: [PATCH 1286/1291] crypto: caam - fix DMA mapping direction for RSA forms 2 & 3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f1bf9e60a0779ec97de9ecdc353e1d01cdd73f43 upstream. Crypto engine needs some temporary locations in external memory for running RSA decrypt forms 2 and 3 (CRT). These are named "tmp1" and "tmp2" in the PDB. Update DMA mapping direction of tmp1 and tmp2 from TO_DEVICE to BIDIRECTIONAL, since engine needs r/w access. Cc: # 4.13+ Fixes: 52e26d77b8b3 ("crypto: caam - add support for RSA key form 2") Fixes: 4a651b122adb ("crypto: caam - add support for RSA key form 3") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/caampkc.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 7ff4a25440ac..6f3f81bb880b 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -71,8 +71,8 @@ static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc, dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); + dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_BIDIRECTIONAL); } static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, @@ -90,8 +90,8 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, dma_unmap_single(dev, pdb->dp_dma, p_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->dq_dma, q_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); + dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_BIDIRECTIONAL); } /* RSA Job Completion handler */ @@ -417,13 +417,13 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req, goto unmap_p; } - pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE); + pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp1_dma)) { dev_err(dev, "Unable to map RSA tmp1 memory\n"); goto unmap_q; } - pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE); + pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp2_dma)) { dev_err(dev, "Unable to map RSA tmp2 memory\n"); goto unmap_tmp1; @@ -451,7 +451,7 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req, return 0; unmap_tmp1: - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); unmap_q: dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); unmap_p: @@ -504,13 +504,13 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req, goto unmap_dq; } - pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE); + pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp1_dma)) { dev_err(dev, "Unable to map RSA tmp1 memory\n"); goto unmap_qinv; } - pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE); + pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp2_dma)) { dev_err(dev, "Unable to map RSA tmp2 memory\n"); goto unmap_tmp1; @@ -538,7 +538,7 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req, return 0; unmap_tmp1: - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); unmap_qinv: dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE); unmap_dq: -- GitLab From ccb38942fbe180a70b773bc1dbe47d48d9458bf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Mon, 6 Aug 2018 15:29:09 +0300 Subject: [PATCH 1287/1291] crypto: caam/jr - fix descriptor DMA unmapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cc98963dbaaea93d17608641b8d6942a5327fc31 upstream. Descriptor address needs to be swapped to CPU endianness before being DMA unmapped. Cc: # 4.8+ Fixes: 261ea058f016 ("crypto: caam - handle core endianness != caam endianness") Reported-by: Laurentiu Tudor Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/jr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index d258953ff488..7fa1be184553 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -190,7 +190,8 @@ static void caam_jr_dequeue(unsigned long devarg) BUG_ON(CIRC_CNT(head, tail + i, JOBR_DEPTH) <= 0); /* Unmap just-run descriptor so we can post-process */ - dma_unmap_single(dev, jrp->outring[hw_idx].desc, + dma_unmap_single(dev, + caam_dma_to_cpu(jrp->outring[hw_idx].desc), jrp->entinfo[sw_idx].desc_size, DMA_TO_DEVICE); -- GitLab From 0682e027f39dfcdef5f5d9c264491a34196f3b25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Mon, 6 Aug 2018 15:29:39 +0300 Subject: [PATCH 1288/1291] crypto: caam/qi - fix error path in xts setkey MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ad876a18048f43b1f66f5d474b7598538668c5de upstream. xts setkey callback returns 0 on some error paths. Fix this by returning -EINVAL. Cc: # 4.12+ Fixes: b189817cf789 ("crypto: caam/qi - add ablkcipher and authenc algorithms") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/caamalg_qi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index e7966e37a5aa..ecc6d755d3c1 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -350,10 +350,8 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, int ret = 0; if (keylen != 2 * AES_MIN_KEY_SIZE && keylen != 2 * AES_MAX_KEY_SIZE) { - crypto_ablkcipher_set_flags(ablkcipher, - CRYPTO_TFM_RES_BAD_KEY_LEN); dev_err(jrdev, "key size mismatch\n"); - return -EINVAL; + goto badkey; } memcpy(ctx->key, key, keylen); @@ -388,7 +386,7 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, return ret; badkey: crypto_ablkcipher_set_flags(ablkcipher, CRYPTO_TFM_RES_BAD_KEY_LEN); - return 0; + return -EINVAL; } /* -- GitLab From 3098933c12130afd50c7474ee3e7318aecce3297 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 31 Jul 2018 01:37:31 +0000 Subject: [PATCH 1289/1291] fs/quota: Fix spectre gadget in do_quotactl commit 7b6924d94a60c6b8c1279ca003e8744e6cd9e8b1 upstream. 'type' is user-controlled, so sanitize it after the bounds check to avoid using it in speculative execution. This covers the following potential gadgets detected with the help of smatch: * fs/ext4/super.c:5741 ext4_quota_read() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/ext4/super.c:5778 ext4_quota_write() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/f2fs/super.c:1552 f2fs_quota_read() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/f2fs/super.c:1608 f2fs_quota_write() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/quota/dquot.c:412 mark_info_dirty() warn: potential spectre issue 'sb_dqopt(sb)->info' [w] * fs/quota/dquot.c:933 dqinit_needed() warn: potential spectre issue 'dquots' [r] * fs/quota/dquot.c:2112 dquot_commit_info() warn: potential spectre issue 'dqopt->ops' [r] * fs/quota/dquot.c:2362 vfs_load_quota_inode() warn: potential spectre issue 'dqopt->files' [w] (local cap) * fs/quota/dquot.c:2369 vfs_load_quota_inode() warn: potential spectre issue 'dqopt->ops' [w] (local cap) * fs/quota/dquot.c:2370 vfs_load_quota_inode() warn: potential spectre issue 'dqopt->info' [w] (local cap) * fs/quota/quota.c:110 quota_getfmt() warn: potential spectre issue 'sb_dqopt(sb)->info' [r] * fs/quota/quota_v2.c:84 v2_check_quota_file() warn: potential spectre issue 'quota_magics' [w] * fs/quota/quota_v2.c:85 v2_check_quota_file() warn: potential spectre issue 'quota_versions' [w] * fs/quota/quota_v2.c:96 v2_read_file_info() warn: potential spectre issue 'dqopt->info' [r] * fs/quota/quota_v2.c:172 v2_write_file_info() warn: potential spectre issue 'dqopt->info' [r] Additionally, a quick inspection indicates there are array accesses with 'type' in quota_on() and quota_off() functions which are also addressed by this. Cc: Josh Poimboeuf Cc: stable@vger.kernel.org Signed-off-by: Jeremy Cline Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/quota/quota.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 43612e2a73af..3f02bab0db4e 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -18,6 +18,7 @@ #include #include #include +#include static int check_quotactl_permission(struct super_block *sb, int type, int cmd, qid_t id) @@ -703,6 +704,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS)) return -EINVAL; + type = array_index_nospec(type, MAXQUOTAS); /* * Quota not supported on this fs? Check this before s_quota_types * since they needn't be set if quota is not supported at all. -- GitLab From 29245d36680efe1b268bef0072f06bf201847abe Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 30 Aug 2018 16:05:32 +0100 Subject: [PATCH 1290/1291] arm64: mm: always enable CONFIG_HOLES_IN_ZONE commit f52bb98f5aded4c43e52f5ce19fb83f7261e9e73 upstream. Commit 6d526ee26ccd ("arm64: mm: enable CONFIG_HOLES_IN_ZONE for NUMA") only enabled HOLES_IN_ZONE for NUMA systems because the NUMA code was choking on the missing zone for nomap pages. This problem doesn't just apply to NUMA systems. If the architecture doesn't set HAVE_ARCH_PFN_VALID, pfn_valid() will return true if the pfn is part of a valid sparsemem section. When working with multiple pages, the mm code uses pfn_valid_within() to test each page it uses within the sparsemem section is valid. On most systems memory comes in MAX_ORDER_NR_PAGES chunks which all have valid/initialised struct pages. In this case pfn_valid_within() is optimised out. Systems where this isn't true (e.g. due to nomap) should set HOLES_IN_ZONE and provide HAVE_ARCH_PFN_VALID so that mm tests each page as it works with it. Currently non-NUMA arm64 systems can't enable HOLES_IN_ZONE, leading to a VM_BUG_ON(): | page:fffffdff802e1780 is uninitialized and poisoned | raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff | raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff | page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) | ------------[ cut here ]------------ | kernel BUG at include/linux/mm.h:978! | Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [...] | CPU: 1 PID: 25236 Comm: dd Not tainted 4.18.0 #7 | Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 | pstate: 40000085 (nZcv daIf -PAN -UAO) | pc : move_freepages_block+0x144/0x248 | lr : move_freepages_block+0x144/0x248 | sp : fffffe0071177680 [...] | Process dd (pid: 25236, stack limit = 0x0000000094cc07fb) | Call trace: | move_freepages_block+0x144/0x248 | steal_suitable_fallback+0x100/0x16c | get_page_from_freelist+0x440/0xb20 | __alloc_pages_nodemask+0xe8/0x838 | new_slab+0xd4/0x418 | ___slab_alloc.constprop.27+0x380/0x4a8 | __slab_alloc.isra.21.constprop.26+0x24/0x34 | kmem_cache_alloc+0xa8/0x180 | alloc_buffer_head+0x1c/0x90 | alloc_page_buffers+0x68/0xb0 | create_empty_buffers+0x20/0x1ec | create_page_buffers+0xb0/0xf0 | __block_write_begin_int+0xc4/0x564 | __block_write_begin+0x10/0x18 | block_write_begin+0x48/0xd0 | blkdev_write_begin+0x28/0x30 | generic_perform_write+0x98/0x16c | __generic_file_write_iter+0x138/0x168 | blkdev_write_iter+0x80/0xf0 | __vfs_write+0xe4/0x10c | vfs_write+0xb4/0x168 | ksys_write+0x44/0x88 | sys_write+0xc/0x14 | el0_svc_naked+0x30/0x34 | Code: aa1303e0 90001a01 91296421 94008902 (d4210000) | ---[ end trace 1601ba47f6e883fe ]--- Remove the NUMA dependency. Link: https://www.spinics.net/lists/arm-kernel/msg671851.html Cc: Cc: Ard Biesheuvel Reported-by: Mikulas Patocka Reviewed-by: Pavel Tatashin Tested-by: Mikulas Patocka Signed-off-by: James Morse Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1bbb89d37f57..c30cd78b6918 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -693,7 +693,6 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK config HOLES_IN_ZONE def_bool y - depends on NUMA source kernel/Kconfig.preempt source kernel/Kconfig.hz -- GitLab From 7fe7a0f4c5cf9e7f5b7cb67c1341cdbf62ed4c30 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 9 Sep 2018 19:56:02 +0200 Subject: [PATCH 1291/1291] Linux 4.14.69 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3da579058926..3ecda1d2e23a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 68 +SUBLEVEL = 69 EXTRAVERSION = NAME = Petit Gorille -- GitLab